krish-emissary commited on 8 days ago

Commit

0d098bc

verified ·

1 Parent(s): ad58bcd

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json +30 -0
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py +130 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py +608 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py +459 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth +1 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py +18 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py +342 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py +348 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py +7 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py +219 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py +304 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth +1 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py +161 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py +141 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py +18 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py +413 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd +1 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd +1 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README +76 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py +452 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py +753 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py +666 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py +549 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py +316 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py +62 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py +590 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py +728 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py +693 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py +1951 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py +856 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py +278 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py +732 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so +0 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py +475 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py +158 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py +543 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py +896 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py +716 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py +1434 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py +440 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py +1126 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py +572 -0
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py +661 -0

emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc ADDED Viewed

Binary file (9.62 kB). View file

emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc ADDED Viewed

Binary file (5.6 kB). View file

emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc ADDED Viewed

Binary file (2.48 kB). View file

emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""Patches that are applied at runtime to the virtual environment"""
+# -*- coding: utf-8 -*-
+import os
+import sys
+VIRTUALENV_PATCH_FILE = os.path.join(__file__)
+def patch_dist(dist):
+    """
+    Distutils allows user to configure some arguments via a configuration file:
+    https://docs.python.org/3/install/index.html#distutils-configuration-files
+    Some of this arguments though don't make sense in context of the virtual environment files, let's fix them up.
+    """
+    # we cannot allow some install config as that would get packages installed outside of the virtual environment
+    old_parse_config_files = dist.Distribution.parse_config_files
+    def parse_config_files(self, *args, **kwargs):
+        result = old_parse_config_files(self, *args, **kwargs)
+        install = self.get_option_dict("install")
+        if "prefix" in install:  # the prefix governs where to install the libraries
+            install["prefix"] = VIRTUALENV_PATCH_FILE, os.path.abspath(sys.prefix)
+        for base in ("purelib", "platlib", "headers", "scripts", "data"):
+            key = "install_{}".format(base)
+            if key in install:  # do not allow global configs to hijack venv paths
+                install.pop(key, None)
+        return result
+    dist.Distribution.parse_config_files = parse_config_files
+# Import hook that patches some modules to ignore configuration values that break package installation in case
+# of virtual environments.
+_DISTUTILS_PATCH = "distutils.dist", "setuptools.dist"
+if sys.version_info > (3, 4):
+    # https://docs.python.org/3/library/importlib.html#setting-up-an-importer
+    from functools import partial
+    from importlib.abc import MetaPathFinder
+    from importlib.util import find_spec
+    class _Finder(MetaPathFinder):
+        """A meta path finder that allows patching the imported distutils modules"""
+        fullname = None
+        # lock[0] is threading.Lock(), but initialized lazily to avoid importing threading very early at startup,
+        # because there are gevent-based applications that need to be first to import threading by themselves.
+        # See https://github.com/pypa/virtualenv/issues/1895 for details.
+        lock = []
+        def find_spec(self, fullname, path, target=None):
+            if fullname in _DISTUTILS_PATCH and self.fullname is None:
+                # initialize lock[0] lazily
+                if len(self.lock) == 0:
+                    import threading
+                    lock = threading.Lock()
+                    # there is possibility that two threads T1 and T2 are simultaneously running into find_spec,
+                    # observing .lock as empty, and further going into hereby initialization. However due to the GIL,
+                    # list.append() operation is atomic and this way only one of the threads will "win" to put the lock
+                    # - that every thread will use - into .lock[0].
+                    # https://docs.python.org/3/faq/library.html#what-kinds-of-global-value-mutation-are-thread-safe
+                    self.lock.append(lock)
+                with self.lock[0]:
+                    self.fullname = fullname
+                    try:
+                        spec = find_spec(fullname, path)
+                        if spec is not None:
+                            # https://www.python.org/dev/peps/pep-0451/#how-loading-will-work
+                            is_new_api = hasattr(spec.loader, "exec_module")
+                            func_name = "exec_module" if is_new_api else "load_module"
+                            old = getattr(spec.loader, func_name)
+                            func = self.exec_module if is_new_api else self.load_module
+                            if old is not func:
+                                try:
+                                    setattr(spec.loader, func_name, partial(func, old))
+                                except AttributeError:
+                                    pass  # C-Extension loaders are r/o such as zipimporter with <python 3.7
+                            return spec
+                    finally:
+                        self.fullname = None
+        @staticmethod
+        def exec_module(old, module):
+            old(module)
+            if module.__name__ in _DISTUTILS_PATCH:
+                patch_dist(module)
+        @staticmethod
+        def load_module(old, name):
+            module = old(name)
+            if module.__name__ in _DISTUTILS_PATCH:
+                patch_dist(module)
+            return module
+    sys.meta_path.insert(0, _Finder())
+else:
+    # https://www.python.org/dev/peps/pep-0302/
+    from imp import find_module
+    from pkgutil import ImpImporter, ImpLoader
+    class _VirtualenvImporter(object, ImpImporter):
+        def __init__(self, path=None):
+            object.__init__(self)
+            ImpImporter.__init__(self, path)
+        def find_module(self, fullname, path=None):
+            if fullname in _DISTUTILS_PATCH:
+                try:
+                    return _VirtualenvLoader(fullname, *find_module(fullname.split(".")[-1], path))
+                except ImportError:
+                    pass
+            return None
+    class _VirtualenvLoader(object, ImpLoader):
+        def __init__(self, fullname, file, filename, etc):
+            object.__init__(self)
+            ImpLoader.__init__(self, fullname, file, filename, etc)
+        def load_module(self, fullname):
+            module = super(_VirtualenvLoader, self).load_module(fullname)
+            patch_dist(module)
+            module.__loader__ = None  # distlib fallback
+            return module
+    sys.meta_path.append(_VirtualenvImporter())

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py ADDED Viewed

	@@ -0,0 +1,608 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2005-2010 ActiveState Software Inc.
+# Copyright (c) 2013 Eddy Petrișor
+"""Utilities for determining application-specific dirs.
+See <http://github.com/ActiveState/appdirs> for details and usage.
+"""
+# Dev Notes:
+# - MSDN on where to store app data files:
+#   http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
+# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+__version__ = "1.4.4"
+__version_info__ = tuple(int(segment) for segment in __version__.split("."))
+import sys
+import os
+PY3 = sys.version_info[0] == 3
+if PY3:
+    unicode = str
+if sys.platform.startswith('java'):
+    import platform
+    os_name = platform.java_ver()[3][0]
+    if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
+        system = 'win32'
+    elif os_name.startswith('Mac'): # "Mac OS X", etc.
+        system = 'darwin'
+    else: # "Linux", "SunOS", "FreeBSD", etc.
+        # Setting this to "linux2" is not ideal, but only Windows or Mac
+        # are actually checked for and the rest of the module expects
+        # *sys.platform* style strings.
+        system = 'linux2'
+else:
+    system = sys.platform
+def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user data directories are:
+        Mac OS X:               ~/Library/Application Support/<AppName>
+        Unix:                   ~/.local/share/<AppName>    # or in $XDG_DATA_HOME, if defined
+        Win XP (not roaming):   C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
+        Win XP (roaming):       C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
+        Win 7  (not roaming):   C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
+        Win 7  (roaming):       C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
+    For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
+    That means, by default "~/.local/share/<AppName>".
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
+        path = os.path.normpath(_get_win_folder(const))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == 'darwin':
+        path = os.path.expanduser('~/Library/Application Support/')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of data dirs should be
+            returned. By default, the first item from XDG_DATA_DIRS is
+            returned, or '/usr/local/share/<AppName>',
+            if XDG_DATA_DIRS is not set
+    Typical site data directories are:
+        Mac OS X:   /Library/Application Support/<AppName>
+        Unix:       /usr/local/share/<AppName> or /usr/share/<AppName>
+        Win XP:     C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+        Win 7:      C:\ProgramData\<AppAuthor>\<AppName>   # Hidden, but writeable on Win 7.
+    For Unix, this is using the $XDG_DATA_DIRS[0] default.
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == 'darwin':
+        path = os.path.expanduser('/Library/Application Support')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        # XDG default for $XDG_DATA_DIRS
+        # only first, if multipath is False
+        path = os.getenv('XDG_DATA_DIRS',
+                         os.pathsep.join(['/usr/local/share', '/usr/share']))
+        pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+        return path
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific config dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user config directories are:
+        Mac OS X:               same as user_data_dir
+        Unix:                   ~/.config/<AppName>     # or in $XDG_CONFIG_HOME, if defined
+        Win *:                  same as user_data_dir
+    For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
+    That means, by default "~/.config/<AppName>".
+    """
+    if system in ["win32", "darwin"]:
+        path = user_data_dir(appname, appauthor, None, roaming)
+    else:
+        path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of config dirs should be
+            returned. By default, the first item from XDG_CONFIG_DIRS is
+            returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
+    Typical site config directories are:
+        Mac OS X:   same as site_data_dir
+        Unix:       /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
+                    $XDG_CONFIG_DIRS
+        Win *:      same as site_data_dir
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+    For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system in ["win32", "darwin"]:
+        path = site_data_dir(appname, appauthor)
+        if appname and version:
+            path = os.path.join(path, version)
+    else:
+        # XDG default for $XDG_CONFIG_DIRS
+        # only first, if multipath is False
+        path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
+        pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+    return path
+def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific cache dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Cache" to the base app data dir for Windows. See
+            discussion below.
+    Typical user cache directories are:
+        Mac OS X:   ~/Library/Caches/<AppName>
+        Unix:       ~/.cache/<AppName> (XDG default)
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
+    On Windows the only suggestion in the MSDN docs is that local settings go in
+    the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
+    app data dir (the default returned by `user_data_dir` above). Apps typically
+    put cache data somewhere *under* the given dir here. Some examples:
+        ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
+        ...\Acme\SuperApp\Cache\1.0
+    OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+            if opinion:
+                path = os.path.join(path, "Cache")
+    elif system == 'darwin':
+        path = os.path.expanduser('~/Library/Caches')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific state dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+    Typical user state directories are:
+        Mac OS X:  same as user_data_dir
+        Unix:      ~/.local/state/<AppName>   # or in $XDG_STATE_HOME, if defined
+        Win *:     same as user_data_dir
+    For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
+    to extend the XDG spec and support $XDG_STATE_HOME.
+    That means, by default "~/.local/state/<AppName>".
+    """
+    if system in ["win32", "darwin"]:
+        path = user_data_dir(appname, appauthor, None, roaming)
+    else:
+        path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific log dir for this application.
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Logs" to the base app data dir for Windows, and "log" to the
+            base cache dir for Unix. See discussion below.
+    Typical user log directories are:
+        Mac OS X:   ~/Library/Logs/<AppName>
+        Unix:       ~/.cache/<AppName>/log  # or under $XDG_CACHE_HOME if defined
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
+    On Windows the only suggestion in the MSDN docs is that local settings
+    go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
+    examples of what some windows apps use for a logs dir.)
+    OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
+    value for Windows and appends "log" to the user cache dir for Unix.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "darwin":
+        path = os.path.join(
+            os.path.expanduser('~/Library/Logs'),
+            appname)
+    elif system == "win32":
+        path = user_data_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "Logs")
+    else:
+        path = user_cache_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "log")
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+class AppDirs(object):
+    """Convenience wrapper for getting application dirs."""
+    def __init__(self, appname=None, appauthor=None, version=None,
+            roaming=False, multipath=False):
+        self.appname = appname
+        self.appauthor = appauthor
+        self.version = version
+        self.roaming = roaming
+        self.multipath = multipath
+    @property
+    def user_data_dir(self):
+        return user_data_dir(self.appname, self.appauthor,
+                             version=self.version, roaming=self.roaming)
+    @property
+    def site_data_dir(self):
+        return site_data_dir(self.appname, self.appauthor,
+                             version=self.version, multipath=self.multipath)
+    @property
+    def user_config_dir(self):
+        return user_config_dir(self.appname, self.appauthor,
+                               version=self.version, roaming=self.roaming)
+    @property
+    def site_config_dir(self):
+        return site_config_dir(self.appname, self.appauthor,
+                             version=self.version, multipath=self.multipath)
+    @property
+    def user_cache_dir(self):
+        return user_cache_dir(self.appname, self.appauthor,
+                              version=self.version)
+    @property
+    def user_state_dir(self):
+        return user_state_dir(self.appname, self.appauthor,
+                              version=self.version)
+    @property
+    def user_log_dir(self):
+        return user_log_dir(self.appname, self.appauthor,
+                            version=self.version)
+#---- internal support stuff
+def _get_win_folder_from_registry(csidl_name):
+    """This is a fallback technique at best. I'm not sure if using the
+    registry for this guarantees us the correct answer for all CSIDL_*
+    names.
+    """
+    if PY3:
+      import winreg as _winreg
+    else:
+      import _winreg
+    shell_folder_name = {
+        "CSIDL_APPDATA": "AppData",
+        "CSIDL_COMMON_APPDATA": "Common AppData",
+        "CSIDL_LOCAL_APPDATA": "Local AppData",
+    }[csidl_name]
+    key = _winreg.OpenKey(
+        _winreg.HKEY_CURRENT_USER,
+        r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
+    )
+    dir, type = _winreg.QueryValueEx(key, shell_folder_name)
+    return dir
+def _get_win_folder_with_pywin32(csidl_name):
+    from win32com.shell import shellcon, shell
+    dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
+    # Try to make this a unicode path because SHGetFolderPath does
+    # not return unicode strings when there is unicode data in the
+    # path.
+    try:
+        dir = unicode(dir)
+        # Downgrade to short path name if have highbit chars. See
+        # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+        has_high_char = False
+        for c in dir:
+            if ord(c) > 255:
+                has_high_char = True
+                break
+        if has_high_char:
+            try:
+                import win32api
+                dir = win32api.GetShortPathName(dir)
+            except ImportError:
+                pass
+    except UnicodeError:
+        pass
+    return dir
+def _get_win_folder_with_ctypes(csidl_name):
+    import ctypes
+    csidl_const = {
+        "CSIDL_APPDATA": 26,
+        "CSIDL_COMMON_APPDATA": 35,
+        "CSIDL_LOCAL_APPDATA": 28,
+    }[csidl_name]
+    buf = ctypes.create_unicode_buffer(1024)
+    ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in buf:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf2 = ctypes.create_unicode_buffer(1024)
+        if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
+            buf = buf2
+    return buf.value
+def _get_win_folder_with_jna(csidl_name):
+    import array
+    from com.sun import jna
+    from com.sun.jna.platform import win32
+    buf_size = win32.WinDef.MAX_PATH * 2
+    buf = array.zeros('c', buf_size)
+    shell = win32.Shell32.INSTANCE
+    shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
+    dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in dir:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf = array.zeros('c', buf_size)
+        kernel = win32.Kernel32.INSTANCE
+        if kernel.GetShortPathName(dir, buf, buf_size):
+            dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+    return dir
+if system == "win32":
+    try:
+        import win32com.shell
+        _get_win_folder = _get_win_folder_with_pywin32
+    except ImportError:
+        try:
+            from ctypes import windll
+            _get_win_folder = _get_win_folder_with_ctypes
+        except ImportError:
+            try:
+                import com.sun.jna
+                _get_win_folder = _get_win_folder_with_jna
+            except ImportError:
+                _get_win_folder = _get_win_folder_from_registry
+#---- self test code
+if __name__ == "__main__":
+    appname = "MyApp"
+    appauthor = "MyCompany"
+    props = ("user_data_dir",
+             "user_config_dir",
+             "user_cache_dir",
+             "user_state_dir",
+             "user_log_dir",
+             "site_data_dir",
+             "site_config_dir")
+    print("-- app dirs %s --" % __version__)
+    print("-- app dirs (with optional 'version')")
+    dirs = AppDirs(appname, appauthor, version="1.0")
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+    print("\n-- app dirs (without optional 'version')")
+    dirs = AppDirs(appname, appauthor)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+    print("\n-- app dirs (without optional 'appauthor')")
+    dirs = AppDirs(appname)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+    print("\n-- app dirs (with disabled 'appauthor')")
+    dirs = AppDirs(appname, appauthor=False)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py ADDED Viewed

	@@ -0,0 +1,459 @@

+# #########################     LICENSE     ############################ #
+# Copyright (c) 2005-2025, Michele Simionato
+# All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#   Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+#   Redistributions in bytecode form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
+"""
+Decorator module, see
+https://github.com/micheles/decorator/blob/master/docs/documentation.md
+for the documentation.
+"""
+import re
+import sys
+import inspect
+import operator
+import itertools
+import functools
+from contextlib import _GeneratorContextManager
+from inspect import getfullargspec, iscoroutinefunction, isgeneratorfunction
+__version__ = '5.2.1'
+DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(')
+POS = inspect.Parameter.POSITIONAL_OR_KEYWORD
+EMPTY = inspect.Parameter.empty
+# this is not used anymore in the core, but kept for backward compatibility
+class FunctionMaker(object):
+    """
+    An object with the ability to create functions with a given signature.
+    It has attributes name, doc, module, signature, defaults, dict and
+    methods update and make.
+    """
+    # Atomic get-and-increment provided by the GIL
+    _compile_count = itertools.count()
+    # make pylint happy
+    args = varargs = varkw = defaults = kwonlyargs = kwonlydefaults = ()
+    def __init__(self, func=None, name=None, signature=None,
+                 defaults=None, doc=None, module=None, funcdict=None):
+        self.shortsignature = signature
+        if func:
+            # func can be a class or a callable, but not an instance method
+            self.name = func.__name__
+            if self.name == '<lambda>':  # small hack for lambda functions
+                self.name = '_lambda_'
+            self.doc = func.__doc__
+            self.module = func.__module__
+            if inspect.isroutine(func) or isinstance(func, functools.partial):
+                argspec = getfullargspec(func)
+                self.annotations = getattr(func, '__annotations__', {})
+                for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs',
+                          'kwonlydefaults'):
+                    setattr(self, a, getattr(argspec, a))
+                for i, arg in enumerate(self.args):
+                    setattr(self, 'arg%d' % i, arg)
+                allargs = list(self.args)
+                allshortargs = list(self.args)
+                if self.varargs:
+                    allargs.append('*' + self.varargs)
+                    allshortargs.append('*' + self.varargs)
+                elif self.kwonlyargs:
+                    allargs.append('*')  # single star syntax
+                for a in self.kwonlyargs:
+                    allargs.append('%s=None' % a)
+                    allshortargs.append('%s=%s' % (a, a))
+                if self.varkw:
+                    allargs.append('**' + self.varkw)
+                    allshortargs.append('**' + self.varkw)
+                self.signature = ', '.join(allargs)
+                self.shortsignature = ', '.join(allshortargs)
+                self.dict = func.__dict__.copy()
+        # func=None happens when decorating a caller
+        if name:
+            self.name = name
+        if signature is not None:
+            self.signature = signature
+        if defaults:
+            self.defaults = defaults
+        if doc:
+            self.doc = doc
+        if module:
+            self.module = module
+        if funcdict:
+            self.dict = funcdict
+        # check existence required attributes
+        assert hasattr(self, 'name')
+        if not hasattr(self, 'signature'):
+            raise TypeError('You are decorating a non function: %s' % func)
+    def update(self, func, **kw):
+        """
+        Update the signature of func with the data in self
+        """
+        func.__name__ = self.name
+        func.__doc__ = getattr(self, 'doc', None)
+        func.__dict__ = getattr(self, 'dict', {})
+        func.__defaults__ = self.defaults
+        func.__kwdefaults__ = self.kwonlydefaults or None
+        func.__annotations__ = getattr(self, 'annotations', None)
+        try:
+            frame = sys._getframe(3)
+        except AttributeError:  # for IronPython and similar implementations
+            callermodule = '?'
+        else:
+            callermodule = frame.f_globals.get('__name__', '?')
+        func.__module__ = getattr(self, 'module', callermodule)
+        func.__dict__.update(kw)
+    def make(self, src_templ, evaldict=None, addsource=False, **attrs):
+        """
+        Make a new function from a given template and update the signature
+        """
+        src = src_templ % vars(self)  # expand name and signature
+        evaldict = evaldict or {}
+        mo = DEF.search(src)
+        if mo is None:
+            raise SyntaxError('not a valid function template\n%s' % src)
+        name = mo.group(1)  # extract the function name
+        names = set([name] + [arg.strip(' *') for arg in
+                              self.shortsignature.split(',')])
+        for n in names:
+            if n in ('_func_', '_call_'):
+                raise NameError('%s is overridden in\n%s' % (n, src))
+        if not src.endswith('\n'):  # add a newline for old Pythons
+            src += '\n'
+        # Ensure each generated function has a unique filename for profilers
+        # (such as cProfile) that depend on the tuple of (<filename>,
+        # <definition line>, <function name>) being unique.
+        filename = '<decorator-gen-%d>' % next(self._compile_count)
+        try:
+            code = compile(src, filename, 'single')
+            exec(code, evaldict)
+        except Exception:
+            print('Error in generated code:', file=sys.stderr)
+            print(src, file=sys.stderr)
+            raise
+        func = evaldict[name]
+        if addsource:
+            attrs['__source__'] = src
+        self.update(func, **attrs)
+        return func
+    @classmethod
+    def create(cls, obj, body, evaldict, defaults=None,
+               doc=None, module=None, addsource=True, **attrs):
+        """
+        Create a function from the strings name, signature and body.
+        evaldict is the evaluation dictionary. If addsource is true an
+        attribute __source__ is added to the result. The attributes attrs
+        are added, if any.
+        """
+        if isinstance(obj, str):  # "name(signature)"
+            name, rest = obj.strip().split('(', 1)
+            signature = rest[:-1]  # strip a right parens
+            func = None
+        else:  # a function
+            name = None
+            signature = None
+            func = obj
+        self = cls(func, name, signature, defaults, doc, module)
+        ibody = '\n'.join('    ' + line for line in body.splitlines())
+        caller = evaldict.get('_call_')  # when called from `decorate`
+        if caller and iscoroutinefunction(caller):
+            body = ('async def %(name)s(%(signature)s):\n' + ibody).replace(
+                'return', 'return await')
+        else:
+            body = 'def %(name)s(%(signature)s):\n' + ibody
+        return self.make(body, evaldict, addsource, **attrs)
+def fix(args, kwargs, sig):
+    """
+    Fix args and kwargs to be consistent with the signature
+    """
+    ba = sig.bind(*args, **kwargs)
+    ba.apply_defaults()  # needed for test_dan_schult
+    return ba.args, ba.kwargs
+def decorate(func, caller, extras=(), kwsyntax=False):
+    """
+    Decorates a function/generator/coroutine using a caller.
+    If kwsyntax is True calling the decorated functions with keyword
+    syntax will pass the named arguments inside the ``kw`` dictionary,
+    even if such argument are positional, similarly to what functools.wraps
+    does. By default kwsyntax is False and the the arguments are untouched.
+    """
+    sig = inspect.signature(func)
+    if isinstance(func, functools.partial):
+        func = functools.update_wrapper(func, func.func)
+    if iscoroutinefunction(caller):
+        async def fun(*args, **kw):
+            if not kwsyntax:
+                args, kw = fix(args, kw, sig)
+            return await caller(func, *(extras + args), **kw)
+    elif isgeneratorfunction(caller):
+        def fun(*args, **kw):
+            if not kwsyntax:
+                args, kw = fix(args, kw, sig)
+            for res in caller(func, *(extras + args), **kw):
+                yield res
+    else:
+        def fun(*args, **kw):
+            if not kwsyntax:
+                args, kw = fix(args, kw, sig)
+            return caller(func, *(extras + args), **kw)
+    fun.__name__ = func.__name__
+    fun.__doc__ = func.__doc__
+    fun.__wrapped__ = func
+    fun.__signature__ = sig
+    fun.__qualname__ = func.__qualname__
+    # builtin functions like defaultdict.__setitem__ lack many attributes
+    try:
+        fun.__defaults__ = func.__defaults__
+    except AttributeError:
+        pass
+    try:
+        fun.__kwdefaults__ = func.__kwdefaults__
+    except AttributeError:
+        pass
+    try:
+        fun.__annotations__ = func.__annotations__
+    except AttributeError:
+        pass
+    try:
+        fun.__module__ = func.__module__
+    except AttributeError:
+        pass
+    try:
+        fun.__name__ = func.__name__
+    except AttributeError:  # happens with old versions of numpy.vectorize
+        func.__name__ == 'noname'
+    try:
+        fun.__dict__.update(func.__dict__)
+    except AttributeError:
+        pass
+    return fun
+def decoratorx(caller):
+    """
+    A version of "decorator" implemented via "exec" and not via the
+    Signature object. Use this if you are want to preserve the `.__code__`
+    object properties (https://github.com/micheles/decorator/issues/129).
+    """
+    def dec(func):
+        return FunctionMaker.create(
+            func,
+            "return _call_(_func_, %(shortsignature)s)",
+            dict(_call_=caller, _func_=func),
+            __wrapped__=func, __qualname__=func.__qualname__)
+    return dec
+def decorator(caller, _func=None, kwsyntax=False):
+    """
+    decorator(caller) converts a caller function into a decorator
+    """
+    if _func is not None:  # return a decorated function
+        # this is obsolete behavior; you should use decorate instead
+        return decorate(_func, caller, (), kwsyntax)
+    # else return a decorator function
+    sig = inspect.signature(caller)
+    dec_params = [p for p in sig.parameters.values() if p.kind is POS]
+    def dec(func=None, *args, **kw):
+        na = len(args) + 1
+        extras = args + tuple(kw.get(p.name, p.default)
+                              for p in dec_params[na:]
+                              if p.default is not EMPTY)
+        if func is None:
+            return lambda func: decorate(func, caller, extras, kwsyntax)
+        else:
+            return decorate(func, caller, extras, kwsyntax)
+    dec.__signature__ = sig.replace(parameters=dec_params)
+    dec.__name__ = caller.__name__
+    dec.__doc__ = caller.__doc__
+    dec.__wrapped__ = caller
+    dec.__qualname__ = caller.__qualname__
+    dec.__kwdefaults__ = getattr(caller, '__kwdefaults__', None)
+    dec.__dict__.update(caller.__dict__)
+    return dec
+# ####################### contextmanager ####################### #
+class ContextManager(_GeneratorContextManager):
+    def __init__(self, g, *a, **k):
+        _GeneratorContextManager.__init__(self, g, a, k)
+    def __call__(self, func):
+        def caller(f, *a, **k):
+            with self.__class__(self.func, *self.args, **self.kwds):
+                return f(*a, **k)
+        return decorate(func, caller)
+_contextmanager = decorator(ContextManager)
+def contextmanager(func):
+    # Enable Pylint config: contextmanager-decorators=decorator.contextmanager
+    return _contextmanager(func)
+# ############################ dispatch_on ############################ #
+def append(a, vancestors):
+    """
+    Append ``a`` to the list of the virtual ancestors, unless it is already
+    included.
+    """
+    add = True
+    for j, va in enumerate(vancestors):
+        if issubclass(va, a):
+            add = False
+            break
+        if issubclass(a, va):
+            vancestors[j] = a
+            add = False
+    if add:
+        vancestors.append(a)
+# inspired from simplegeneric by P.J. Eby and functools.singledispatch
+def dispatch_on(*dispatch_args):
+    """
+    Factory of decorators turning a function into a generic function
+    dispatching on the given arguments.
+    """
+    assert dispatch_args, 'No dispatch args passed'
+    dispatch_str = '(%s,)' % ', '.join(dispatch_args)
+    def check(arguments, wrong=operator.ne, msg=''):
+        """Make sure one passes the expected number of arguments"""
+        if wrong(len(arguments), len(dispatch_args)):
+            raise TypeError('Expected %d arguments, got %d%s' %
+                            (len(dispatch_args), len(arguments), msg))
+    def gen_func_dec(func):
+        """Decorator turning a function into a generic function"""
+        # first check the dispatch arguments
+        argset = set(getfullargspec(func).args)
+        if not set(dispatch_args) <= argset:
+            raise NameError('Unknown dispatch arguments %s' % dispatch_str)
+        typemap = {}
+        def vancestors(*types):
+            """
+            Get a list of sets of virtual ancestors for the given types
+            """
+            check(types)
+            ras = [[] for _ in range(len(dispatch_args))]
+            for types_ in typemap:
+                for t, type_, ra in zip(types, types_, ras):
+                    if issubclass(t, type_) and type_ not in t.mro():
+                        append(type_, ra)
+            return [set(ra) for ra in ras]
+        def ancestors(*types):
+            """
+            Get a list of virtual MROs, one for each type
+            """
+            check(types)
+            lists = []
+            for t, vas in zip(types, vancestors(*types)):
+                n_vas = len(vas)
+                if n_vas > 1:
+                    raise RuntimeError(
+                        'Ambiguous dispatch for %s: %s' % (t, vas))
+                elif n_vas == 1:
+                    va, = vas
+                    mro = type('t', (t, va), {}).mro()[1:]
+                else:
+                    mro = t.mro()
+                lists.append(mro[:-1])  # discard t and object
+            return lists
+        def register(*types):
+            """
+            Decorator to register an implementation for the given types
+            """
+            check(types)
+            def dec(f):
+                check(getfullargspec(f).args, operator.lt, ' in ' + f.__name__)
+                typemap[types] = f
+                return f
+            return dec
+        def dispatch_info(*types):
+            """
+            An utility to introspect the dispatch algorithm
+            """
+            check(types)
+            lst = []
+            for ancs in itertools.product(*ancestors(*types)):
+                lst.append(tuple(a.__name__ for a in ancs))
+            return lst
+        def _dispatch(dispatch_args, *args, **kw):
+            types = tuple(type(arg) for arg in dispatch_args)
+            try:  # fast path
+                f = typemap[types]
+            except KeyError:
+                pass
+            else:
+                return f(*args, **kw)
+            combinations = itertools.product(*ancestors(*types))
+            next(combinations)  # the first one has been already tried
+            for types_ in combinations:
+                f = typemap.get(types_)
+                if f is not None:
+                    return f(*args, **kw)
+            # else call the default implementation
+            return func(*args, **kw)
+        return FunctionMaker.create(
+            func, 'return _f_(%s, %%(shortsignature)s)' % dispatch_str,
+            dict(_f_=_dispatch), register=register, default=func,
+            typemap=typemap, vancestors=vancestors, ancestors=ancestors,
+            dispatch_info=dispatch_info, __wrapped__=func)
+    gen_func_dec.__name__ = 'dispatch_on' + dispatch_str
+    return gen_func_dec

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Entry point for launching an IPython kernel.
+This is separate from the ipykernel package so we can avoid doing imports until
+after removing the cwd from sys.path.
+"""
+import sys
+from pathlib import Path
+if __name__ == "__main__":
+    # Remove the CWD from sys.path while we load stuff.
+    # This is added back by InteractiveShellApp.init_path()
+    if sys.path[0] == "" or Path(sys.path[0]) == Path.cwd():
+        del sys.path[0]
+    from ipykernel import kernelapp as app
+    app.launch_new_instance()

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""
+Python shell for SymPy.
+This is just a normal Python shell (IPython shell if you have the
+IPython package installed), that executes the following commands for
+the user:
+    >>> from __future__ import division
+    >>> from sympy import *
+    >>> x, y, z, t = symbols('x y z t')
+    >>> k, m, n = symbols('k m n', integer=True)
+    >>> f, g, h = symbols('f g h', cls=Function)
+    >>> init_printing()
+So starting 'isympy' is equivalent to starting Python (or IPython) and
+executing the above commands by hand.  It is intended for easy and quick
+experimentation with SymPy.  isympy is a good way to use SymPy as an
+interactive calculator. If you have IPython and Matplotlib installed, then
+interactive plotting is enabled by default.
+COMMAND LINE OPTIONS
+--------------------
+-c CONSOLE, --console=CONSOLE
+     Use the specified shell (Python or IPython) shell as the console
+     backend instead of the default one (IPython if present, Python
+     otherwise), e.g.:
+        $isympy -c python
+    CONSOLE must be one of 'ipython' or 'python'
+-p PRETTY, --pretty PRETTY
+    Setup pretty-printing in SymPy. When pretty-printing is enabled,
+    expressions can be printed with Unicode or ASCII. The default is
+    to use pretty-printing (with Unicode if the terminal supports it).
+    When this option is 'no', expressions will not be pretty-printed
+    and ASCII will be used:
+        $isympy -p no
+    PRETTY must be one of 'unicode', 'ascii', or 'no'
+-t TYPES, --types=TYPES
+    Setup the ground types for the polys.  By default, gmpy ground types
+    are used if gmpy2 or gmpy is installed, otherwise it falls back to python
+    ground types, which are a little bit slower.  You can manually
+    choose python ground types even if gmpy is installed (e.g., for
+    testing purposes):
+        $isympy -t python
+    TYPES must be one of 'gmpy', 'gmpy1' or 'python'
+    Note that the ground type gmpy1 is primarily intended for testing; it
+    forces the use of gmpy version 1 even if gmpy2 is available.
+    This is the same as setting the environment variable
+    SYMPY_GROUND_TYPES to the given ground type (e.g.,
+    SYMPY_GROUND_TYPES='gmpy')
+    The ground types can be determined interactively from the variable
+    sympy.polys.domains.GROUND_TYPES.
+-o ORDER, --order ORDER
+    Setup the ordering of terms for printing.  The default is lex, which
+    orders terms lexicographically (e.g., x**2 + x + 1). You can choose
+    other orderings, such as rev-lex, which will use reverse
+    lexicographic ordering (e.g., 1 + x + x**2):
+        $isympy -o rev-lex
+    ORDER must be one of 'lex', 'rev-lex', 'grlex', 'rev-grlex',
+    'grevlex', 'rev-grevlex', 'old', or 'none'.
+    Note that for very large expressions, ORDER='none' may speed up
+    printing considerably but the terms will have no canonical order.
+-q, --quiet
+    Print only Python's and SymPy's versions to stdout at startup.
+-d, --doctest
+    Use the same format that should be used for doctests.  This is
+    equivalent to -c python -p no.
+-C, --no-cache
+    Disable the caching mechanism.  Disabling the cache may slow certain
+    operations down considerably.  This is useful for testing the cache,
+    or for benchmarking, as the cache can result in deceptive timings.
+    This is equivalent to setting the environment variable
+    SYMPY_USE_CACHE to 'no'.
+-a, --auto-symbols (requires at least IPython 0.11)
+    Automatically create missing symbols.  Normally, typing a name of a
+    Symbol that has not been instantiated first would raise NameError,
+    but with this option enabled, any undefined name will be
+    automatically created as a Symbol.
+    Note that this is intended only for interactive, calculator style
+    usage. In a script that uses SymPy, Symbols should be instantiated
+    at the top, so that it's clear what they are.
+    This will not override any names that are already defined, which
+    includes the single character letters represented by the mnemonic
+    QCOSINE (see the "Gotchas and Pitfalls" document in the
+    documentation). You can delete existing names by executing "del
+    name".  If a name is defined, typing "'name' in dir()" will return True.
+    The Symbols that are created using this have default assumptions.
+    If you want to place assumptions on symbols, you should create them
+    using symbols() or var().
+    Finally, this only works in the top level namespace. So, for
+    example, if you define a function in isympy with an undefined
+    Symbol, it will not work.
+    See also the -i and -I options.
+-i, --int-to-Integer (requires at least IPython 0.11)
+    Automatically wrap int literals with Integer.  This makes it so that
+    things like 1/2 will come out as Rational(1, 2), rather than 0.5.  This
+    works by preprocessing the source and wrapping all int literals with
+    Integer.  Note that this will not change the behavior of int literals
+    assigned to variables, and it also won't change the behavior of functions
+    that return int literals.
+    If you want an int, you can wrap the literal in int(), e.g. int(3)/int(2)
+    gives 1.5 (with division imported from __future__).
+-I, --interactive (requires at least IPython 0.11)
+    This is equivalent to --auto-symbols --int-to-Integer.  Future options
+    designed for ease of interactive use may be added to this.
+-D, --debug
+    Enable debugging output.  This is the same as setting the
+    environment variable SYMPY_DEBUG to 'True'.  The debug status is set
+    in the variable SYMPY_DEBUG within isympy.
+-- IPython options
+    Additionally you can pass command line options directly to the IPython
+    interpreter (the standard Python shell is not supported).  However you
+    need to add the '--' separator between two types of options, e.g the
+    startup banner option and the colors option. You need to enter the
+    options as required by the version of IPython that you are using, too:
+    in IPython 0.11,
+        $isympy -q -- --colors=NoColor
+    or older versions of IPython,
+        $isympy -q -- -colors NoColor
+See also isympy --help.
+"""
+import os
+import sys
+# DO NOT IMPORT SYMPY HERE! Or the setting of the sympy environment variables
+# by the command line will break.
+def main() -> None:
+    from argparse import ArgumentParser, RawDescriptionHelpFormatter
+    VERSION = None
+    if '--version' in sys.argv:
+        # We cannot import sympy before this is run, because flags like -C and
+        # -t set environment variables that must be set before SymPy is
+        # imported. The only thing we need to import it for is to get the
+        # version, which only matters with the --version flag.
+        import sympy
+        VERSION = sympy.__version__
+    usage = 'isympy [options] -- [ipython options]'
+    parser = ArgumentParser(
+        usage=usage,
+        description=__doc__,
+        formatter_class=RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--version', action='version', version=VERSION)
+    parser.add_argument(
+        '-c', '--console',
+        dest='console',
+        action='store',
+        default=None,
+        choices=['ipython', 'python'],
+        metavar='CONSOLE',
+        help='select type of interactive session: ipython | python; defaults '
+        'to ipython if IPython is installed, otherwise python')
+    parser.add_argument(
+        '-p', '--pretty',
+        dest='pretty',
+        action='store',
+        default=None,
+        metavar='PRETTY',
+        choices=['unicode', 'ascii', 'no'],
+        help='setup pretty printing: unicode | ascii | no; defaults to '
+        'unicode printing if the terminal supports it, otherwise ascii')
+    parser.add_argument(
+        '-t', '--types',
+        dest='types',
+        action='store',
+        default=None,
+        metavar='TYPES',
+        choices=['gmpy', 'gmpy1', 'python'],
+        help='setup ground types: gmpy | gmpy1 | python; defaults to gmpy if gmpy2 '
+        'or gmpy is installed, otherwise python')
+    parser.add_argument(
+        '-o', '--order',
+        dest='order',
+        action='store',
+        default=None,
+        metavar='ORDER',
+        choices=['lex', 'grlex', 'grevlex', 'rev-lex', 'rev-grlex', 'rev-grevlex', 'old', 'none'],
+        help='setup ordering of terms: [rev-]lex | [rev-]grlex | [rev-]grevlex | old | none; defaults to lex')
+    parser.add_argument(
+        '-q', '--quiet',
+        dest='quiet',
+        action='store_true',
+        default=False,
+        help='print only version information at startup')
+    parser.add_argument(
+        '-d', '--doctest',
+        dest='doctest',
+        action='store_true',
+        default=False,
+        help='use the doctest format for output (you can just copy and paste it)')
+    parser.add_argument(
+        '-C', '--no-cache',
+        dest='cache',
+        action='store_false',
+        default=True,
+        help='disable caching mechanism')
+    parser.add_argument(
+        '-a', '--auto-symbols',
+        dest='auto_symbols',
+        action='store_true',
+        default=False,
+        help='automatically construct missing symbols')
+    parser.add_argument(
+        '-i', '--int-to-Integer',
+        dest='auto_int_to_Integer',
+        action='store_true',
+        default=False,
+        help="automatically wrap int literals with Integer")
+    parser.add_argument(
+        '-I', '--interactive',
+        dest='interactive',
+        action='store_true',
+        default=False,
+        help="equivalent to -a -i")
+    parser.add_argument(
+        '-D', '--debug',
+        dest='debug',
+        action='store_true',
+        default=False,
+        help='enable debugging output')
+    (options, ipy_args) = parser.parse_known_args()
+    if '--' in ipy_args:
+        ipy_args.remove('--')
+    if not options.cache:
+        os.environ['SYMPY_USE_CACHE'] = 'no'
+    if options.types:
+        os.environ['SYMPY_GROUND_TYPES'] = options.types
+    if options.debug:
+        os.environ['SYMPY_DEBUG'] = str(options.debug)
+    if options.doctest:
+        options.pretty = 'no'
+        options.console = 'python'
+    session = options.console
+    if session is not None:
+        ipython = session == 'ipython'
+    else:
+        try:
+            import IPython
+            ipython = True
+        except ImportError:
+            if not options.quiet:
+                from sympy.interactive.session import no_ipython
+                print(no_ipython)
+            ipython = False
+    args = {
+        'pretty_print': True,
+        'use_unicode':  None,
+        'use_latex':    None,
+        'order':        None,
+        'argv':         ipy_args,
+    }
+    if options.pretty == 'unicode':
+        args['use_unicode'] = True
+    elif options.pretty == 'ascii':
+        args['use_unicode'] = False
+    elif options.pretty == 'no':
+        args['pretty_print'] = False
+    if options.order is not None:
+        args['order'] = options.order
+    args['quiet'] = options.quiet
+    args['auto_symbols'] = options.auto_symbols or options.interactive
+    args['auto_int_to_Integer'] = options.auto_int_to_Integer or options.interactive
+    from sympy.interactive import init_session
+    init_session(ipython, **args)
+if __name__ == "__main__":
+    main()

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py ADDED Viewed

	@@ -0,0 +1,348 @@

+# -*- coding: utf-8 -*-
+#
+# python-json-pointer - An implementation of the JSON Pointer syntax
+# https://github.com/stefankoegl/python-json-pointer
+#
+# Copyright (c) 2011 Stefan Kögl <[email protected]>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+""" Identify specific nodes in a JSON document (RFC 6901) """
+# Will be parsed by setup.py to determine package metadata
+__author__ = 'Stefan Kögl <[email protected]>'
+__version__ = '3.0.0'
+__website__ = 'https://github.com/stefankoegl/python-json-pointer'
+__license__ = 'Modified BSD License'
+import copy
+import re
+from collections.abc import Mapping, Sequence
+from itertools import tee, chain
+_nothing = object()
+def set_pointer(doc, pointer, value, inplace=True):
+    """Resolves a pointer against doc and sets the value of the target within doc.
+    With inplace set to true, doc is modified as long as pointer is not the
+    root.
+    >>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}}
+    >>> set_pointer(obj, '/foo/anArray/0/prop', 55) == \
+    {'foo': {'another prop': {'baz': 'A string'}, 'anArray': [{'prop': 55}]}}
+    True
+    >>> set_pointer(obj, '/foo/yet another prop', 'added prop') == \
+    {'foo': {'another prop': {'baz': 'A string'}, 'yet another prop': 'added prop', 'anArray': [{'prop': 55}]}}
+    True
+    >>> obj = {'foo': {}}
+    >>> set_pointer(obj, '/foo/a%20b', 'x') == \
+    {'foo': {'a%20b': 'x' }}
+    True
+    """
+    pointer = JsonPointer(pointer)
+    return pointer.set(doc, value, inplace)
+def resolve_pointer(doc, pointer, default=_nothing):
+    """ Resolves pointer against doc and returns the referenced object
+    >>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}, 'a%20b': 1, 'c d': 2}
+    >>> resolve_pointer(obj, '') == obj
+    True
+    >>> resolve_pointer(obj, '/foo') == obj['foo']
+    True
+    >>> resolve_pointer(obj, '/foo/another prop') == obj['foo']['another prop']
+    True
+    >>> resolve_pointer(obj, '/foo/another prop/baz') == obj['foo']['another prop']['baz']
+    True
+    >>> resolve_pointer(obj, '/foo/anArray/0') == obj['foo']['anArray'][0]
+    True
+    >>> resolve_pointer(obj, '/some/path', None) == None
+    True
+    >>> resolve_pointer(obj, '/a b', None) == None
+    True
+    >>> resolve_pointer(obj, '/a%20b') == 1
+    True
+    >>> resolve_pointer(obj, '/c d') == 2
+    True
+    >>> resolve_pointer(obj, '/c%20d', None) == None
+    True
+    """
+    pointer = JsonPointer(pointer)
+    return pointer.resolve(doc, default)
+def pairwise(iterable):
+    """ Transforms a list to a list of tuples of adjacent items
+    s -> (s0,s1), (s1,s2), (s2, s3), ...
+    >>> list(pairwise([]))
+    []
+    >>> list(pairwise([1]))
+    []
+    >>> list(pairwise([1, 2, 3, 4]))
+    [(1, 2), (2, 3), (3, 4)]
+    """
+    a, b = tee(iterable)
+    for _ in b:
+        break
+    return zip(a, b)
+class JsonPointerException(Exception):
+    pass
+class EndOfList(object):
+    """Result of accessing element "-" of a list"""
+    def __init__(self, list_):
+        self.list_ = list_
+    def __repr__(self):
+        return '{cls}({lst})'.format(cls=self.__class__.__name__,
+                                     lst=repr(self.list_))
+class JsonPointer(object):
+    """A JSON Pointer that can reference parts of a JSON document"""
+    # Array indices must not contain:
+    # leading zeros, signs, spaces, decimals, etc
+    _RE_ARRAY_INDEX = re.compile('0|[1-9][0-9]*$')
+    _RE_INVALID_ESCAPE = re.compile('(~[^01]|~$)')
+    def __init__(self, pointer):
+        # validate escapes
+        invalid_escape = self._RE_INVALID_ESCAPE.search(pointer)
+        if invalid_escape:
+            raise JsonPointerException('Found invalid escape {}'.format(
+                invalid_escape.group()))
+        parts = pointer.split('/')
+        if parts.pop(0) != '':
+            raise JsonPointerException('Location must start with /')
+        parts = [unescape(part) for part in parts]
+        self.parts = parts
+    def to_last(self, doc):
+        """Resolves ptr until the last step, returns (sub-doc, last-step)"""
+        if not self.parts:
+            return doc, None
+        for part in self.parts[:-1]:
+            doc = self.walk(doc, part)
+        return doc, JsonPointer.get_part(doc, self.parts[-1])
+    def resolve(self, doc, default=_nothing):
+        """Resolves the pointer against doc and returns the referenced object"""
+        for part in self.parts:
+            try:
+                doc = self.walk(doc, part)
+            except JsonPointerException:
+                if default is _nothing:
+                    raise
+                else:
+                    return default
+        return doc
+    get = resolve
+    def set(self, doc, value, inplace=True):
+        """Resolve the pointer against the doc and replace the target with value."""
+        if len(self.parts) == 0:
+            if inplace:
+                raise JsonPointerException('Cannot set root in place')
+            return value
+        if not inplace:
+            doc = copy.deepcopy(doc)
+        (parent, part) = self.to_last(doc)
+        if isinstance(parent, Sequence) and part == '-':
+            parent.append(value)
+        else:
+            parent[part] = value
+        return doc
+    @classmethod
+    def get_part(cls, doc, part):
+        """Returns the next step in the correct type"""
+        if isinstance(doc, Mapping):
+            return part
+        elif isinstance(doc, Sequence):
+            if part == '-':
+                return part
+            if not JsonPointer._RE_ARRAY_INDEX.match(str(part)):
+                raise JsonPointerException("'%s' is not a valid sequence index" % part)
+            return int(part)
+        elif hasattr(doc, '__getitem__'):
+            # Allow indexing via ducktyping
+            # if the target has defined __getitem__
+            return part
+        else:
+            raise JsonPointerException("Document '%s' does not support indexing, "
+                                       "must be mapping/sequence or support __getitem__" % type(doc))
+    def get_parts(self):
+        """Returns the list of the parts. For example, JsonPointer('/a/b').get_parts() == ['a', 'b']"""
+        return self.parts
+    def walk(self, doc, part):
+        """ Walks one step in doc and returns the referenced part """
+        part = JsonPointer.get_part(doc, part)
+        assert hasattr(doc, '__getitem__'), "invalid document type %s" % (type(doc),)
+        if isinstance(doc, Sequence):
+            if part == '-':
+                return EndOfList(doc)
+            try:
+                return doc[part]
+            except IndexError:
+                raise JsonPointerException("index '%s' is out of bounds" % (part,))
+        # Else the object is a mapping or supports __getitem__(so assume custom indexing)
+        try:
+            return doc[part]
+        except KeyError:
+            raise JsonPointerException("member '%s' not found in %s" % (part, doc))
+    def contains(self, ptr):
+        """ Returns True if self contains the given ptr """
+        return self.parts[:len(ptr.parts)] == ptr.parts
+    def __contains__(self, item):
+        """ Returns True if self contains the given ptr """
+        return self.contains(item)
+    def join(self, suffix):
+        """ Returns a new JsonPointer with the given suffix append to this ptr """
+        if isinstance(suffix, JsonPointer):
+            suffix_parts = suffix.parts
+        elif isinstance(suffix, str):
+            suffix_parts = JsonPointer(suffix).parts
+        else:
+            suffix_parts = suffix
+        try:
+            return JsonPointer.from_parts(chain(self.parts, suffix_parts))
+        except:  # noqa E722
+            raise JsonPointerException("Invalid suffix")
+    def __truediv__(self, suffix):  # Python 3
+        return self.join(suffix)
+    @property
+    def path(self):
+        """Returns the string representation of the pointer
+        >>> ptr = JsonPointer('/~0/0/~1').path == '/~0/0/~1'
+        """
+        parts = [escape(part) for part in self.parts]
+        return ''.join('/' + part for part in parts)
+    def __eq__(self, other):
+        """Compares a pointer to another object
+        Pointers can be compared by comparing their strings (or splitted
+        strings), because no two different parts can point to the same
+        structure in an object (eg no different number representations)
+        """
+        if not isinstance(other, JsonPointer):
+            return False
+        return self.parts == other.parts
+    def __hash__(self):
+        return hash(tuple(self.parts))
+    def __str__(self):
+        return self.path
+    def __repr__(self):
+        return type(self).__name__ + "(" + repr(self.path) + ")"
+    @classmethod
+    def from_parts(cls, parts):
+        """Constructs a JsonPointer from a list of (unescaped) paths
+        >>> JsonPointer.from_parts(['a', '~', '/', 0]).path == '/a/~0/~1/0'
+        True
+        """
+        parts = [escape(str(part)) for part in parts]
+        ptr = cls(''.join('/' + part for part in parts))
+        return ptr
+def escape(s):
+    return s.replace('~', '~0').replace('/', '~1')
+def unescape(s):
+    return s.replace('~1', '/').replace('~0', '~')

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Launch the root jupyter command"""
+from __future__ import annotations
+if __name__ == "__main__":
+    from jupyter_core.command import main
+    main()

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""Patch asyncio to allow nested event loops."""
+import asyncio
+import asyncio.events as events
+import os
+import sys
+import threading
+from contextlib import contextmanager, suppress
+from heapq import heappop
+def apply(loop=None):
+    """Patch asyncio to make its event loop reentrant."""
+    _patch_asyncio()
+    _patch_policy()
+    _patch_tornado()
+    loop = loop or asyncio.get_event_loop()
+    _patch_loop(loop)
+def _patch_asyncio():
+    """Patch asyncio module to use pure Python tasks and futures."""
+    def run(main, *, debug=False):
+        loop = asyncio.get_event_loop()
+        loop.set_debug(debug)
+        task = asyncio.ensure_future(main)
+        try:
+            return loop.run_until_complete(task)
+        finally:
+            if not task.done():
+                task.cancel()
+                with suppress(asyncio.CancelledError):
+                    loop.run_until_complete(task)
+    def _get_event_loop(stacklevel=3):
+        loop = events._get_running_loop()
+        if loop is None:
+            loop = events.get_event_loop_policy().get_event_loop()
+        return loop
+    # Use module level _current_tasks, all_tasks and patch run method.
+    if hasattr(asyncio, '_nest_patched'):
+        return
+    if sys.version_info >= (3, 6, 0):
+        asyncio.Task = asyncio.tasks._CTask = asyncio.tasks.Task = \
+            asyncio.tasks._PyTask
+        asyncio.Future = asyncio.futures._CFuture = asyncio.futures.Future = \
+            asyncio.futures._PyFuture
+    if sys.version_info < (3, 7, 0):
+        asyncio.tasks._current_tasks = asyncio.tasks.Task._current_tasks
+        asyncio.all_tasks = asyncio.tasks.Task.all_tasks
+    if sys.version_info >= (3, 9, 0):
+        events._get_event_loop = events.get_event_loop = \
+            asyncio.get_event_loop = _get_event_loop
+    asyncio.run = run
+    asyncio._nest_patched = True
+def _patch_policy():
+    """Patch the policy to always return a patched loop."""
+    def get_event_loop(self):
+        if self._local._loop is None:
+            loop = self.new_event_loop()
+            _patch_loop(loop)
+            self.set_event_loop(loop)
+        return self._local._loop
+    policy = events.get_event_loop_policy()
+    policy.__class__.get_event_loop = get_event_loop
+def _patch_loop(loop):
+    """Patch loop to make it reentrant."""
+    def run_forever(self):
+        with manage_run(self), manage_asyncgens(self):
+            while True:
+                self._run_once()
+                if self._stopping:
+                    break
+        self._stopping = False
+    def run_until_complete(self, future):
+        with manage_run(self):
+            f = asyncio.ensure_future(future, loop=self)
+            if f is not future:
+                f._log_destroy_pending = False
+            while not f.done():
+                self._run_once()
+                if self._stopping:
+                    break
+            if not f.done():
+                raise RuntimeError(
+                    'Event loop stopped before Future completed.')
+            return f.result()
+    def _run_once(self):
+        """
+        Simplified re-implementation of asyncio's _run_once that
+        runs handles as they become ready.
+        """
+        ready = self._ready
+        scheduled = self._scheduled
+        while scheduled and scheduled[0]._cancelled:
+            heappop(scheduled)
+        timeout = (
+            0 if ready or self._stopping
+            else min(max(
+                scheduled[0]._when - self.time(), 0), 86400) if scheduled
+            else None)
+        event_list = self._selector.select(timeout)
+        self._process_events(event_list)
+        end_time = self.time() + self._clock_resolution
+        while scheduled and scheduled[0]._when < end_time:
+            handle = heappop(scheduled)
+            ready.append(handle)
+        for _ in range(len(ready)):
+            if not ready:
+                break
+            handle = ready.popleft()
+            if not handle._cancelled:
+                # preempt the current task so that that checks in
+                # Task.__step do not raise
+                curr_task = curr_tasks.pop(self, None)
+                try:
+                    handle._run()
+                finally:
+                    # restore the current task
+                    if curr_task is not None:
+                        curr_tasks[self] = curr_task
+        handle = None
+    @contextmanager
+    def manage_run(self):
+        """Set up the loop for running."""
+        self._check_closed()
+        old_thread_id = self._thread_id
+        old_running_loop = events._get_running_loop()
+        try:
+            self._thread_id = threading.get_ident()
+            events._set_running_loop(self)
+            self._num_runs_pending += 1
+            if self._is_proactorloop:
+                if self._self_reading_future is None:
+                    self.call_soon(self._loop_self_reading)
+            yield
+        finally:
+            self._thread_id = old_thread_id
+            events._set_running_loop(old_running_loop)
+            self._num_runs_pending -= 1
+            if self._is_proactorloop:
+                if (self._num_runs_pending == 0
+                        and self._self_reading_future is not None):
+                    ov = self._self_reading_future._ov
+                    self._self_reading_future.cancel()
+                    if ov is not None:
+                        self._proactor._unregister(ov)
+                    self._self_reading_future = None
+    @contextmanager
+    def manage_asyncgens(self):
+        if not hasattr(sys, 'get_asyncgen_hooks'):
+            # Python version is too old.
+            return
+        old_agen_hooks = sys.get_asyncgen_hooks()
+        try:
+            self._set_coroutine_origin_tracking(self._debug)
+            if self._asyncgens is not None:
+                sys.set_asyncgen_hooks(
+                    firstiter=self._asyncgen_firstiter_hook,
+                    finalizer=self._asyncgen_finalizer_hook)
+            yield
+        finally:
+            self._set_coroutine_origin_tracking(False)
+            if self._asyncgens is not None:
+                sys.set_asyncgen_hooks(*old_agen_hooks)
+    def _check_running(self):
+        """Do not throw exception if loop is already running."""
+        pass
+    if hasattr(loop, '_nest_patched'):
+        return
+    if not isinstance(loop, asyncio.BaseEventLoop):
+        raise ValueError('Can\'t patch loop of type %s' % type(loop))
+    cls = loop.__class__
+    cls.run_forever = run_forever
+    cls.run_until_complete = run_until_complete
+    cls._run_once = _run_once
+    cls._check_running = _check_running
+    cls._check_runnung = _check_running  # typo in Python 3.7 source
+    cls._num_runs_pending = 1 if loop.is_running() else 0
+    cls._is_proactorloop = (
+        os.name == 'nt' and issubclass(cls, asyncio.ProactorEventLoop))
+    if sys.version_info < (3, 7, 0):
+        cls._set_coroutine_origin_tracking = cls._set_coroutine_wrapper
+    curr_tasks = asyncio.tasks._current_tasks \
+        if sys.version_info >= (3, 7, 0) else asyncio.Task._current_tasks
+    cls._nest_patched = True
+def _patch_tornado():
+    """
+    If tornado is imported before nest_asyncio, make tornado aware of
+    the pure-Python asyncio Future.
+    """
+    if 'tornado' in sys.modules:
+        import tornado.concurrent as tc  # type: ignore
+        tc.Future = asyncio.Future
+        if asyncio.Future not in tc.FUTURES:
+            tc.FUTURES += (asyncio.Future,)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py ADDED Viewed

	@@ -0,0 +1,304 @@

+# Author: John MacFarlane <[email protected]>
+# Copyright: (C) 2013 John MacFarlane
+# License: BSD3
+"""
+Functions to aid writing python scripts that process the pandoc
+AST serialized as JSON.
+"""
+import codecs
+import hashlib
+import io
+import json
+import os
+import sys
+import atexit
+import shutil
+import tempfile
+# some utility-functions: make it easier to create your own filters
+def get_filename4code(module, content, ext=None):
+    """Generate filename based on content
+    The function ensures that the (temporary) directory exists, so that the
+    file can be written.
+    By default, the directory won't be cleaned up,
+    so a filter can use the directory as a cache and
+    decide not to regenerate if there's no change.
+    In case the user preferres the files to be temporary files,
+    an environment variable `PANDOCFILTER_CLEANUP` can be set to
+    any non-empty value such as `1` to
+    make sure the directory is created in a temporary location and removed
+    after finishing the filter. In this case there's no caching and files
+    will be regenerated each time the filter is run.
+    Example:
+        filename = get_filename4code("myfilter", code)
+    """
+    if os.getenv('PANDOCFILTER_CLEANUP'):
+        imagedir = tempfile.mkdtemp(prefix=module)
+        atexit.register(lambda: shutil.rmtree(imagedir))
+    else:
+        imagedir = module + "-images"
+    fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
+    try:
+        os.makedirs(imagedir, exist_ok=True)
+        sys.stderr.write('Created directory ' + imagedir + '\n')
+    except OSError:
+        sys.stderr.write('Could not create directory "' + imagedir + '"\n')
+    if ext:
+        fn += "." + ext
+    return os.path.join(imagedir, fn)
+def get_value(kv, key, value = None):
+    """get value from the keyvalues (options)"""
+    res = []
+    for k, v in kv:
+        if k == key:
+            value = v
+        else:
+            res.append([k, v])
+    return value, res
+def get_caption(kv):
+    """get caption from the keyvalues (options)
+    Example:
+      if key == 'CodeBlock':
+        [[ident, classes, keyvals], code] = value
+        caption, typef, keyvals = get_caption(keyvals)
+        ...
+        return Para([Image([ident, [], keyvals], caption, [filename, typef])])
+    """
+    caption = []
+    typef = ""
+    value, res = get_value(kv, u"caption")
+    if value is not None:
+        caption = [Str(value)]
+        typef = "fig:"
+    return caption, typef, res
+def get_extension(format, default, **alternates):
+    """get the extension for the result, needs a default and some specialisations
+    Example:
+      filetype = get_extension(format, "png", html="svg", latex="eps")
+    """
+    try:
+        return alternates[format]
+    except KeyError:
+        return default
+# end of utilities
+def walk(x, action, format, meta):
+    """Walk a tree, applying an action to every object.
+    Returns a modified tree.  An action is a function of the form
+    `action(key, value, format, meta)`, where:
+    * `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
+    * the contents of the object (e.g. a string for 'Str', a list of
+      inline elements for 'Para')
+    * `format` is the target output format (as supplied by the
+      `format` argument of `walk`)
+    * `meta` is the document's metadata
+    The return of an action is either:
+    * `None`: this means that the object should remain unchanged
+    * a pandoc object: this will replace the original object
+    * a list of pandoc objects: these will replace the original object; the
+      list is merged with the neighbors of the orignal objects (spliced into
+      the list the original object belongs to); returning an empty list deletes
+      the object
+    """
+    if isinstance(x, list):
+        array = []
+        for item in x:
+            if isinstance(item, dict) and 't' in item:
+                res = action(item['t'],
+                             item['c'] if 'c' in item else None, format, meta)
+                if res is None:
+                    array.append(walk(item, action, format, meta))
+                elif isinstance(res, list):
+                    for z in res:
+                        array.append(walk(z, action, format, meta))
+                else:
+                    array.append(walk(res, action, format, meta))
+            else:
+                array.append(walk(item, action, format, meta))
+        return array
+    elif isinstance(x, dict):
+        return {k: walk(v, action, format, meta) for k, v in x.items()}
+    else:
+        return x
+def toJSONFilter(action):
+    """Like `toJSONFilters`, but takes a single action as argument.
+    """
+    toJSONFilters([action])
+def toJSONFilters(actions):
+    """Generate a JSON-to-JSON filter from stdin to stdout
+    The filter:
+    * reads a JSON-formatted pandoc document from stdin
+    * transforms it by walking the tree and performing the actions
+    * returns a new JSON-formatted pandoc document to stdout
+    The argument `actions` is a list of functions of the form
+    `action(key, value, format, meta)`, as described in more
+    detail under `walk`.
+    This function calls `applyJSONFilters`, with the `format`
+    argument provided by the first command-line argument,
+    if present.  (Pandoc sets this by default when calling
+    filters.)
+    """
+    try:
+        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+    except AttributeError:
+        # Python 2 does not have sys.stdin.buffer.
+        # REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
+        input_stream = codecs.getreader("utf-8")(sys.stdin)
+    source = input_stream.read()
+    if len(sys.argv) > 1:
+        format = sys.argv[1]
+    else:
+        format = ""
+    sys.stdout.write(applyJSONFilters(actions, source, format))
+def applyJSONFilters(actions, source, format=""):
+    """Walk through JSON structure and apply filters
+    This:
+    * reads a JSON-formatted pandoc document from a source string
+    * transforms it by walking the tree and performing the actions
+    * returns a new JSON-formatted pandoc document as a string
+    The `actions` argument is a list of functions (see `walk`
+    for a full description).
+    The argument `source` is a string encoded JSON object.
+    The argument `format` is a string describing the output format.
+    Returns a the new JSON-formatted pandoc document.
+    """
+    doc = json.loads(source)
+    if 'meta' in doc:
+        meta = doc['meta']
+    elif doc[0]:  # old API
+        meta = doc[0]['unMeta']
+    else:
+        meta = {}
+    altered = doc
+    for action in actions:
+        altered = walk(altered, action, format, meta)
+    return json.dumps(altered)
+def stringify(x):
+    """Walks the tree x and returns concatenated string content,
+    leaving out all formatting.
+    """
+    result = []
+    def go(key, val, format, meta):
+        if key in ['Str', 'MetaString']:
+            result.append(val)
+        elif key == 'Code':
+            result.append(val[1])
+        elif key == 'Math':
+            result.append(val[1])
+        elif key == 'LineBreak':
+            result.append(" ")
+        elif key == 'SoftBreak':
+            result.append(" ")
+        elif key == 'Space':
+            result.append(" ")
+    walk(x, go, "", {})
+    return ''.join(result)
+def attributes(attrs):
+    """Returns an attribute list, constructed from the
+    dictionary attrs.
+    """
+    attrs = attrs or {}
+    ident = attrs.get("id", "")
+    classes = attrs.get("classes", [])
+    keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
+    return [ident, classes, keyvals]
+def elt(eltType, numargs):
+    def fun(*args):
+        lenargs = len(args)
+        if lenargs != numargs:
+            raise ValueError(eltType + ' expects ' + str(numargs) +
+                             ' arguments, but given ' + str(lenargs))
+        if numargs == 0:
+            xs = []
+        elif len(args) == 1:
+            xs = args[0]
+        else:
+            xs = list(args)
+        return {'t': eltType, 'c': xs}
+    return fun
+# Constructors for block elements
+Plain = elt('Plain', 1)
+Para = elt('Para', 1)
+CodeBlock = elt('CodeBlock', 2)
+RawBlock = elt('RawBlock', 2)
+BlockQuote = elt('BlockQuote', 1)
+OrderedList = elt('OrderedList', 2)
+BulletList = elt('BulletList', 1)
+DefinitionList = elt('DefinitionList', 1)
+Header = elt('Header', 3)
+HorizontalRule = elt('HorizontalRule', 0)
+Table = elt('Table', 5)
+Div = elt('Div', 2)
+Null = elt('Null', 0)
+# Constructors for inline elements
+Str = elt('Str', 1)
+Emph = elt('Emph', 1)
+Strong = elt('Strong', 1)
+Strikeout = elt('Strikeout', 1)
+Superscript = elt('Superscript', 1)
+Subscript = elt('Subscript', 1)
+SmallCaps = elt('SmallCaps', 1)
+Quoted = elt('Quoted', 2)
+Cite = elt('Cite', 2)
+Code = elt('Code', 2)
+Space = elt('Space', 0)
+LineBreak = elt('LineBreak', 0)
+Math = elt('Math', 2)
+RawInline = elt('RawInline', 2)
+Link = elt('Link', 3)
+Image = elt('Image', 3)
+Note = elt('Note', 1)
+SoftBreak = elt('SoftBreak', 0)
+Span = elt('Span', 2)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv ADDED Viewed

File without changes

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth ADDED Viewed

	@@ -0,0 +1 @@

+ import sys, types, os;has_mfs = sys.version_info > (3, 5);p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('google',));importlib = has_mfs and __import__('importlib.util');has_mfs and __import__('importlib.machinery');m = has_mfs and sys.modules.setdefault('google', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('google', [os.path.dirname(p)])));m = m or sys.modules.setdefault('google', types.ModuleType('google'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# This file is generated by SciPy's build process
+# It contains system_info results at the time of building this package.
+from enum import Enum
+__all__ = ["show"]
+_built_with_meson = True
+class DisplayModes(Enum):
+    stdout = "stdout"
+    dicts = "dicts"
+def _cleanup(d):
+    """
+    Removes empty values in a `dict` recursively
+    This ensures we remove values that Meson could not provide to CONFIG
+    """
+    if isinstance(d, dict):
+        return { k: _cleanup(v) for k, v in d.items() if v != '' and _cleanup(v) != '' }
+    else:
+        return d
+CONFIG = _cleanup(
+    {
+        "Compilers": {
+            "c": {
+                "name": "gcc",
+                "linker": r"ld.bfd",
+                "version": "10.2.1",
+                "commands": r"cc",
+                "args": r"",
+                "linker args": r"",
+            },
+            "cython": {
+                "name": r"cython",
+                "linker": r"cython",
+                "version": r"3.0.11",
+                "commands": r"cython",
+                "args": r"",
+                "linker args": r"",
+            },
+            "c++": {
+                "name": "gcc",
+                "linker": r"ld.bfd",
+                "version": "10.2.1",
+                "commands": r"c++",
+                "args": r"",
+                "linker args": r"",
+            },
+            "fortran": {
+                "name": "gcc",
+                "linker": r"ld.bfd",
+                "version": "10.2.1",
+                "commands": r"gfortran",
+                "args": r"",
+                "linker args": r"",
+            },
+            "pythran": {
+                "version": r"0.16.1",
+                "include directory": r"../../tmp/pip-build-env-h_xz8lfs/overlay/lib/python3.10/site-packages/pythran"
+            },
+        },
+        "Machine Information": {
+            "host": {
+                "cpu": r"x86_64",
+                "family": r"x86_64",
+                "endian": r"little",
+                "system": r"linux",
+            },
+            "build": {
+                "cpu": r"x86_64",
+                "family": r"x86_64",
+                "endian": r"little",
+                "system": r"linux",
+            },
+            "cross-compiled": bool("False".lower().replace('false', '')),
+        },
+        "Build Dependencies": {
+            "blas": {
+                "name": "scipy-openblas",
+                "found": bool("True".lower().replace('false', '')),
+                "version": "0.3.27.dev",
+                "detection method": "pkgconfig",
+                "include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
+                "lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
+                "openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
+                "pc file directory": r"/project",
+            },
+            "lapack": {
+                "name": "scipy-openblas",
+                "found": bool("True".lower().replace('false', '')),
+                "version": "0.3.27.dev",
+                "detection method": "pkgconfig",
+                "include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
+                "lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
+                "openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
+                "pc file directory": r"/project",
+            },
+            "pybind11": {
+                "name": "pybind11",
+                "version": "2.12.0",
+                "detection method": "config-tool",
+                "include directory": r"unknown",
+            },
+        },
+        "Python Information": {
+            "path": r"/opt/python/cp310-cp310/bin/python",
+            "version": "3.10",
+        },
+    }
+)
+def _check_pyyaml():
+    import yaml
+    return yaml
+def show(mode=DisplayModes.stdout.value):
+    """
+    Show libraries and system information on which SciPy was built
+    and is being used
+    Parameters
+    ----------
+    mode : {`'stdout'`, `'dicts'`}, optional.
+        Indicates how to display the config information.
+        `'stdout'` prints to console, `'dicts'` returns a dictionary
+        of the configuration.
+    Returns
+    -------
+    out : {`dict`, `None`}
+        If mode is `'dicts'`, a dict is returned, else None
+    Notes
+    -----
+    1. The `'stdout'` mode will give more readable
+       output if ``pyyaml`` is installed
+    """
+    if mode == DisplayModes.stdout.value:
+        try:  # Non-standard library, check import
+            yaml = _check_pyyaml()
+            print(yaml.dump(CONFIG))
+        except ModuleNotFoundError:
+            import warnings
+            import json
+            warnings.warn("Install `pyyaml` for better output", stacklevel=1)
+            print(json.dumps(CONFIG, indent=2))
+    elif mode == DisplayModes.dicts.value:
+        return CONFIG
+    else:
+        raise AttributeError(
+            f"Invalid `mode`, use one of: {', '.join([e.value for e in DisplayModes])}"
+        )

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""
+SciPy: A scientific computing package for Python
+================================================
+Documentation is available in the docstrings and
+online at https://docs.scipy.org.
+Subpackages
+-----------
+Using any of these subpackages requires an explicit import. For example,
+``import scipy.cluster``.
+::
+ cluster                      --- Vector Quantization / Kmeans
+ constants                    --- Physical and mathematical constants and units
+ datasets                     --- Dataset methods
+ fft                          --- Discrete Fourier transforms
+ fftpack                      --- Legacy discrete Fourier transforms
+ integrate                    --- Integration routines
+ interpolate                  --- Interpolation Tools
+ io                           --- Data input and output
+ linalg                       --- Linear algebra routines
+ misc                         --- Utilities that don't have another home.
+ ndimage                      --- N-D image package
+ odr                          --- Orthogonal Distance Regression
+ optimize                     --- Optimization Tools
+ signal                       --- Signal Processing Tools
+ sparse                       --- Sparse Matrices
+ spatial                      --- Spatial data structures and algorithms
+ special                      --- Special functions
+ stats                        --- Statistical Functions
+Public API in the main SciPy namespace
+--------------------------------------
+::
+ __version__       --- SciPy version string
+ LowLevelCallable  --- Low-level callback function
+ show_config       --- Show scipy build configuration
+ test              --- Run scipy unittests
+"""
+import importlib as _importlib
+from numpy import __version__ as __numpy_version__
+try:
+    from scipy.__config__ import show as show_config
+except ImportError as e:
+    msg = """Error importing SciPy: you cannot import SciPy while
+    being in scipy source directory; please exit the SciPy source
+    tree first and relaunch your Python interpreter."""
+    raise ImportError(msg) from e
+from scipy.version import version as __version__
+# Allow distributors to run custom init code
+from . import _distributor_init
+del _distributor_init
+from scipy._lib import _pep440
+# In maintenance branch, change to np_maxversion N+3 if numpy is at N
+np_minversion = '1.23.5'
+np_maxversion = '2.3.0'
+if (_pep440.parse(__numpy_version__) < _pep440.Version(np_minversion) or
+        _pep440.parse(__numpy_version__) >= _pep440.Version(np_maxversion)):
+    import warnings
+    warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
+                  f" is required for this version of SciPy (detected "
+                  f"version {__numpy_version__})",
+                  UserWarning, stacklevel=2)
+del _pep440
+# This is the first import of an extension module within SciPy. If there's
+# a general issue with the install, such that extension modules are missing
+# or cannot be imported, this is where we'll get a failure - so give an
+# informative error message.
+try:
+    from scipy._lib._ccallback import LowLevelCallable
+except ImportError as e:
+    msg = "The `scipy` install you are using seems to be broken, " + \
+          "(extension modules cannot be imported), " + \
+          "please try reinstalling."
+    raise ImportError(msg) from e
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
+submodules = [
+    'cluster',
+    'constants',
+    'datasets',
+    'fft',
+    'fftpack',
+    'integrate',
+    'interpolate',
+    'io',
+    'linalg',
+    'misc',
+    'ndimage',
+    'odr',
+    'optimize',
+    'signal',
+    'sparse',
+    'spatial',
+    'special',
+    'stats'
+]
+__all__ = submodules + [
+    'LowLevelCallable',
+    'test',
+    'show_config',
+    '__version__',
+]
+def __dir__():
+    return __all__
+def __getattr__(name):
+    if name in submodules:
+        return _importlib.import_module(f'scipy.{name}')
+    else:
+        try:
+            return globals()[name]
+        except KeyError:
+            raise AttributeError(
+                f"Module 'scipy' has no attribute '{name}'"
+            )

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py ADDED Viewed

	@@ -0,0 +1,18 @@

+""" Distributor init file
+Distributors: you can replace the contents of this file with your own custom
+code to support particular distributions of SciPy.
+For example, this is a good place to put any checks for hardware requirements
+or BLAS/LAPACK library initialization.
+The SciPy standard source distribution will not put code in this file beyond
+the try-except import of `_distributor_init_local` (which is not part of a
+standard source distribution), so you can safely replace this file with your
+own version.
+"""
+try:
+    from . import _distributor_init_local  # noqa: F401
+except ImportError:
+    pass

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py ADDED Viewed

	@@ -0,0 +1,413 @@

+# Pytest customization
+import json
+import os
+import warnings
+import tempfile
+from contextlib import contextmanager
+import numpy as np
+import numpy.testing as npt
+import pytest
+import hypothesis
+from scipy._lib._fpumode import get_fpu_mode
+from scipy._lib._testutils import FPUModeChangeWarning
+from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_DEVICE
+from scipy._lib import _pep440
+try:
+    from scipy_doctest.conftest import dt_config
+    HAVE_SCPDT = True
+except ModuleNotFoundError:
+    HAVE_SCPDT = False
+def pytest_configure(config):
+    config.addinivalue_line("markers",
+        "slow: Tests that are very slow.")
+    config.addinivalue_line("markers",
+        "xslow: mark test as extremely slow (not run unless explicitly requested)")
+    config.addinivalue_line("markers",
+        "xfail_on_32bit: mark test as failing on 32-bit platforms")
+    try:
+        import pytest_timeout  # noqa:F401
+    except Exception:
+        config.addinivalue_line(
+            "markers", 'timeout: mark a test for a non-default timeout')
+    try:
+        # This is a more reliable test of whether pytest_fail_slow is installed
+        # When I uninstalled it, `import pytest_fail_slow` didn't fail!
+        from pytest_fail_slow import parse_duration  # type: ignore[import-not-found] # noqa:F401,E501
+    except Exception:
+        config.addinivalue_line(
+            "markers", 'fail_slow: mark a test for a non-default timeout failure')
+    config.addinivalue_line("markers",
+        "skip_xp_backends(*backends, reasons=None, np_only=False, cpu_only=False): "
+        "mark the desired skip configuration for the `skip_xp_backends` fixture.")
+def pytest_runtest_setup(item):
+    mark = item.get_closest_marker("xslow")
+    if mark is not None:
+        try:
+            v = int(os.environ.get('SCIPY_XSLOW', '0'))
+        except ValueError:
+            v = False
+        if not v:
+            pytest.skip("very slow test; "
+                        "set environment variable SCIPY_XSLOW=1 to run it")
+    mark = item.get_closest_marker("xfail_on_32bit")
+    if mark is not None and np.intp(0).itemsize < 8:
+        pytest.xfail(f'Fails on our 32-bit test platform(s): {mark.args[0]}')
+    # Older versions of threadpoolctl have an issue that may lead to this
+    # warning being emitted, see gh-14441
+    with npt.suppress_warnings() as sup:
+        sup.filter(pytest.PytestUnraisableExceptionWarning)
+        try:
+            from threadpoolctl import threadpool_limits
+            HAS_THREADPOOLCTL = True
+        except Exception:  # observed in gh-14441: (ImportError, AttributeError)
+            # Optional dependency only. All exceptions are caught, for robustness
+            HAS_THREADPOOLCTL = False
+        if HAS_THREADPOOLCTL:
+            # Set the number of openmp threads based on the number of workers
+            # xdist is using to prevent oversubscription. Simplified version of what
+            # sklearn does (it can rely on threadpoolctl and its builtin OpenMP helper
+            # functions)
+            try:
+                xdist_worker_count = int(os.environ['PYTEST_XDIST_WORKER_COUNT'])
+            except KeyError:
+                # raises when pytest-xdist is not installed
+                return
+            if not os.getenv('OMP_NUM_THREADS'):
+                max_openmp_threads = os.cpu_count() // 2  # use nr of physical cores
+                threads_per_worker = max(max_openmp_threads // xdist_worker_count, 1)
+                try:
+                    threadpool_limits(threads_per_worker, user_api='blas')
+                except Exception:
+                    # May raise AttributeError for older versions of OpenBLAS.
+                    # Catch any error for robustness.
+                    return
+@pytest.fixture(scope="function", autouse=True)
+def check_fpu_mode(request):
+    """
+    Check FPU mode was not changed during the test.
+    """
+    old_mode = get_fpu_mode()
+    yield
+    new_mode = get_fpu_mode()
+    if old_mode != new_mode:
+        warnings.warn(f"FPU mode changed from {old_mode:#x} to {new_mode:#x} during "
+                      "the test",
+                      category=FPUModeChangeWarning, stacklevel=0)
+# Array API backend handling
+xp_available_backends = {'numpy': np}
+if SCIPY_ARRAY_API and isinstance(SCIPY_ARRAY_API, str):
+    # fill the dict of backends with available libraries
+    try:
+        import array_api_strict
+        xp_available_backends.update({'array_api_strict': array_api_strict})
+        if _pep440.parse(array_api_strict.__version__) < _pep440.Version('2.0'):
+            raise ImportError("array-api-strict must be >= version 2.0")
+        array_api_strict.set_array_api_strict_flags(
+            api_version='2023.12'
+        )
+    except ImportError:
+        pass
+    try:
+        import torch  # type: ignore[import-not-found]
+        xp_available_backends.update({'pytorch': torch})
+        # can use `mps` or `cpu`
+        torch.set_default_device(SCIPY_DEVICE)
+    except ImportError:
+        pass
+    try:
+        import cupy  # type: ignore[import-not-found]
+        xp_available_backends.update({'cupy': cupy})
+    except ImportError:
+        pass
+    try:
+        import jax.numpy  # type: ignore[import-not-found]
+        xp_available_backends.update({'jax.numpy': jax.numpy})
+        jax.config.update("jax_enable_x64", True)
+        jax.config.update("jax_default_device", jax.devices(SCIPY_DEVICE)[0])
+    except ImportError:
+        pass
+    # by default, use all available backends
+    if SCIPY_ARRAY_API.lower() not in ("1", "true"):
+        SCIPY_ARRAY_API_ = json.loads(SCIPY_ARRAY_API)
+        if 'all' in SCIPY_ARRAY_API_:
+            pass  # same as True
+        else:
+            # only select a subset of backend by filtering out the dict
+            try:
+                xp_available_backends = {
+                    backend: xp_available_backends[backend]
+                    for backend in SCIPY_ARRAY_API_
+                }
+            except KeyError:
+                msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}"
+                raise ValueError(msg)
+if 'cupy' in xp_available_backends:
+    SCIPY_DEVICE = 'cuda'
+array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values())
+skip_xp_invalid_arg = pytest.mark.skipif(SCIPY_ARRAY_API,
+    reason = ('Test involves masked arrays, object arrays, or other types '
+              'that are not valid input when `SCIPY_ARRAY_API` is used.'))
+@pytest.fixture
+def skip_xp_backends(xp, request):
+    """
+    Skip based on the ``skip_xp_backends`` marker.
+    Parameters
+    ----------
+    *backends : tuple
+        Backends to skip, e.g. ``("array_api_strict", "torch")``.
+        These are overriden when ``np_only`` is ``True``, and are not
+        necessary to provide for non-CPU backends when ``cpu_only`` is ``True``.
+    reasons : list, optional
+        A list of reasons for each skip. When ``np_only`` is ``True``,
+        this should be a singleton list. Otherwise, this should be a list
+        of reasons, one for each corresponding backend in ``backends``.
+        If unprovided, default reasons are used. Note that it is not possible
+        to specify a custom reason with ``cpu_only``. Default: ``None``.
+    np_only : bool, optional
+        When ``True``, the test is skipped for all backends other
+        than the default NumPy backend. There is no need to provide
+        any ``backends`` in this case. To specify a reason, pass a
+        singleton list to ``reasons``. Default: ``False``.
+    cpu_only : bool, optional
+        When ``True``, the test is skipped on non-CPU devices.
+        There is no need to provide any ``backends`` in this case,
+        but any ``backends`` will also be skipped on the CPU.
+        Default: ``False``.
+    """
+    if "skip_xp_backends" not in request.keywords:
+        return
+    backends = request.keywords["skip_xp_backends"].args
+    kwargs = request.keywords["skip_xp_backends"].kwargs
+    np_only = kwargs.get("np_only", False)
+    cpu_only = kwargs.get("cpu_only", False)
+    if np_only:
+        reasons = kwargs.get("reasons", ["do not run with non-NumPy backends."])
+        reason = reasons[0]
+        if xp.__name__ != 'numpy':
+            pytest.skip(reason=reason)
+        return
+    if cpu_only:
+        reason = "do not run with `SCIPY_ARRAY_API` set and not on CPU"
+        if SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu':
+            if xp.__name__ == 'cupy':
+                pytest.skip(reason=reason)
+            elif xp.__name__ == 'torch':
+                if 'cpu' not in xp.empty(0).device.type:
+                    pytest.skip(reason=reason)
+            elif xp.__name__ == 'jax.numpy':
+                for d in xp.empty(0).devices():
+                    if 'cpu' not in d.device_kind:
+                        pytest.skip(reason=reason)
+    if backends is not None:
+        reasons = kwargs.get("reasons", False)
+        for i, backend in enumerate(backends):
+            if xp.__name__ == backend:
+                if not reasons:
+                    reason = f"do not run with array API backend: {backend}"
+                else:
+                    reason = reasons[i]
+                pytest.skip(reason=reason)
+# Following the approach of NumPy's conftest.py...
+# Use a known and persistent tmpdir for hypothesis' caches, which
+# can be automatically cleared by the OS or user.
+hypothesis.configuration.set_hypothesis_home_dir(
+    os.path.join(tempfile.gettempdir(), ".hypothesis")
+)
+# We register two custom profiles for SciPy - for details see
+# https://hypothesis.readthedocs.io/en/latest/settings.html
+# The first is designed for our own CI runs; the latter also
+# forces determinism and is designed for use via scipy.test()
+hypothesis.settings.register_profile(
+    name="nondeterministic", deadline=None, print_blob=True,
+)
+hypothesis.settings.register_profile(
+    name="deterministic",
+    deadline=None, print_blob=True, database=None, derandomize=True,
+    suppress_health_check=list(hypothesis.HealthCheck),
+)
+# Profile is currently set by environment variable `SCIPY_HYPOTHESIS_PROFILE`
+# In the future, it would be good to work the choice into dev.py.
+SCIPY_HYPOTHESIS_PROFILE = os.environ.get("SCIPY_HYPOTHESIS_PROFILE",
+                                          "deterministic")
+hypothesis.settings.load_profile(SCIPY_HYPOTHESIS_PROFILE)
+############################################################################
+# doctesting stuff
+if HAVE_SCPDT:
+    # FIXME: populate the dict once
+    @contextmanager
+    def warnings_errors_and_rng(test=None):
+        """Temporarily turn (almost) all warnings to errors.
+        Filter out known warnings which we allow.
+        """
+        known_warnings = dict()
+        # these functions are known to emit "divide by zero" RuntimeWarnings
+        divide_by_zero = [
+            'scipy.linalg.norm', 'scipy.ndimage.center_of_mass',
+        ]
+        for name in divide_by_zero:
+            known_warnings[name] = dict(category=RuntimeWarning,
+                                        message='divide by zero')
+        # Deprecated stuff in scipy.signal and elsewhere
+        deprecated = [
+            'scipy.signal.cwt', 'scipy.signal.morlet', 'scipy.signal.morlet2',
+            'scipy.signal.ricker',
+            'scipy.integrate.simpson',
+            'scipy.interpolate.interp2d',
+        ]
+        for name in deprecated:
+            known_warnings[name] = dict(category=DeprecationWarning)
+        from scipy import integrate
+        # the funcions are known to emit IntergrationWarnings
+        integration_w = ['scipy.special.ellip_normal',
+                         'scipy.special.ellip_harm_2',
+        ]
+        for name in integration_w:
+            known_warnings[name] = dict(category=integrate.IntegrationWarning,
+                                        message='The occurrence of roundoff')
+        # scipy.stats deliberately emits UserWarnings sometimes
+        user_w = ['scipy.stats.anderson_ksamp', 'scipy.stats.kurtosistest',
+                  'scipy.stats.normaltest', 'scipy.sparse.linalg.norm']
+        for name in user_w:
+            known_warnings[name] = dict(category=UserWarning)
+        # additional one-off warnings to filter
+        dct = {
+            'scipy.sparse.linalg.norm':
+                dict(category=UserWarning, message="Exited at iteration"),
+            # tutorials
+            'linalg.rst':
+                dict(message='the matrix subclass is not',
+                     category=PendingDeprecationWarning),
+            'stats.rst':
+                dict(message='The maximum number of subdivisions',
+                     category=integrate.IntegrationWarning),
+        }
+        known_warnings.update(dct)
+        # these legitimately emit warnings in examples
+        legit = set('scipy.signal.normalize')
+        # Now, the meat of the matter: filter warnings,
+        # also control the random seed for each doctest.
+        # XXX: this matches the refguide-check behavior, but is a tad strange:
+        # makes sure that the seed the old-fashioned np.random* methods is
+        # *NOT* reproducible but the new-style `default_rng()` *IS* repoducible.
+        # Should these two be either both repro or both not repro?
+        from scipy._lib._util import _fixed_default_rng
+        import numpy as np
+        with _fixed_default_rng():
+            np.random.seed(None)
+            with warnings.catch_warnings():
+                if test and test.name in known_warnings:
+                    warnings.filterwarnings('ignore',
+                                            **known_warnings[test.name])
+                    yield
+                elif test and test.name in legit:
+                    yield
+                else:
+                    warnings.simplefilter('error', Warning)
+                    yield
+    dt_config.user_context_mgr = warnings_errors_and_rng
+    dt_config.skiplist = set([
+        'scipy.linalg.LinAlgError',     # comes from numpy
+        'scipy.fftpack.fftshift',       # fftpack stuff is also from numpy
+        'scipy.fftpack.ifftshift',
+        'scipy.fftpack.fftfreq',
+        'scipy.special.sinc',           # sinc is from numpy
+        'scipy.optimize.show_options',  # does not have much to doctest
+        'scipy.signal.normalize',       # manipulates warnings (XXX temp skip)
+        'scipy.sparse.linalg.norm',     # XXX temp skip
+    ])
+    # these are affected by NumPy 2.0 scalar repr: rely on string comparison
+    if np.__version__ < "2":
+        dt_config.skiplist.update(set([
+            'scipy.io.hb_read',
+            'scipy.io.hb_write',
+            'scipy.sparse.csgraph.connected_components',
+            'scipy.sparse.csgraph.depth_first_order',
+            'scipy.sparse.csgraph.shortest_path',
+            'scipy.sparse.csgraph.floyd_warshall',
+            'scipy.sparse.csgraph.dijkstra',
+            'scipy.sparse.csgraph.bellman_ford',
+            'scipy.sparse.csgraph.johnson',
+            'scipy.sparse.csgraph.yen',
+            'scipy.sparse.csgraph.breadth_first_order',
+            'scipy.sparse.csgraph.reverse_cuthill_mckee',
+            'scipy.sparse.csgraph.structural_rank',
+            'scipy.sparse.csgraph.construct_dist_matrix',
+            'scipy.sparse.csgraph.reconstruct_path',
+            'scipy.ndimage.value_indices',
+            'scipy.stats.mstats.describe',
+    ]))
+    # help pytest collection a bit: these names are either private
+    # (distributions), or just do not need doctesting.
+    dt_config.pytest_extra_ignore = [
+        "scipy.stats.distributions",
+        "scipy.optimize.cython_optimize",
+        "scipy.test",
+        "scipy.show_config",
+    ]
+    dt_config.pytest_extra_xfail = {
+        # name: reason
+        "io.rst": "",
+        "ND_regular_grid.rst": "ReST parser limitation",
+        "extrapolation_examples.rst": "ReST parser limitation",
+        "sampling_pinv.rst": "__cinit__ unexpected argument",
+        "sampling_srou.rst": "nan in scalar_power",
+        "probability_distributions.rst": "integration warning",
+    }
+    # tutorials
+    dt_config.pseudocode = set(['integrate.nquad(func,'])
+    dt_config.local_resources = {'io.rst': ["octave_a.mat"]}
+############################################################################

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd ADDED Viewed

	@@ -0,0 +1 @@


1	+ from scipy.linalg cimport cython_blas, cython_lapack

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .optimize cimport cython_optimize

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README ADDED Viewed

	@@ -0,0 +1,76 @@

+From the website for the L-BFGS-B code (from at
+http://www.ece.northwestern.edu/~nocedal/lbfgsb.html):
+"""
+L-BFGS-B is a limited-memory quasi-Newton code for bound-constrained
+optimization, i.e. for problems where the only constraints are of the
+form l<= x <= u.
+"""
+This is a Python wrapper (using F2PY) written by David M. Cooke
+<[email protected]> and released as version 0.9 on April 9, 2004.
+The wrapper was slightly modified by Joonas Paalasmaa for the 3.0 version
+in March 2012.
+License of L-BFGS-B (Fortran code)
+==================================
+The version included here (in lbfgsb.f) is 3.0 (released April 25, 2011). It was
+written by Ciyou Zhu, Richard Byrd, and Jorge Nocedal <[email protected]>. It
+carries the following condition for use:
+  """
+  This software is freely available, but we expect that all publications
+  describing work using this software, or all commercial products using it,
+  quote at least one of the references given below. This software is released
+  under the BSD License.
+  References
+    * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
+      Constrained Optimization, (1995), SIAM Journal on Scientific and
+      Statistical Computing, 16, 5, pp. 1190-1208.
+    * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (1997),
+      ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
+    * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (2011),
+      ACM Transactions on Mathematical Software, 38, 1.
+  """
+The Python wrapper
+==================
+This code uses F2PY (http://cens.ioc.ee/projects/f2py2e/) to generate
+the wrapper around the Fortran code.
+The Python code and wrapper are copyrighted 2004 by David M. Cooke
+<[email protected]>.
+Example usage
+=============
+An example of the usage is given at the bottom of the lbfgsb.py file.
+Run it with 'python lbfgsb.py'.
+License for the Python wrapper
+==============================
+Copyright (c) 2004 David M. Cooke <[email protected]>
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py ADDED Viewed

	@@ -0,0 +1,452 @@

+"""
+=====================================================
+Optimization and root finding (:mod:`scipy.optimize`)
+=====================================================
+.. currentmodule:: scipy.optimize
+.. toctree::
+   :hidden:
+   optimize.cython_optimize
+SciPy ``optimize`` provides functions for minimizing (or maximizing)
+objective functions, possibly subject to constraints. It includes
+solvers for nonlinear problems (with support for both local and global
+optimization algorithms), linear programming, constrained
+and nonlinear least-squares, root finding, and curve fitting.
+Common functions and objects, shared across different solvers, are:
+.. autosummary::
+   :toctree: generated/
+   show_options - Show specific options optimization solvers.
+   OptimizeResult - The optimization result returned by some optimizers.
+   OptimizeWarning - The optimization encountered problems.
+Optimization
+============
+Scalar functions optimization
+-----------------------------
+.. autosummary::
+   :toctree: generated/
+   minimize_scalar - Interface for minimizers of univariate functions
+The `minimize_scalar` function supports the following methods:
+.. toctree::
+   optimize.minimize_scalar-brent
+   optimize.minimize_scalar-bounded
+   optimize.minimize_scalar-golden
+Local (multivariate) optimization
+---------------------------------
+.. autosummary::
+   :toctree: generated/
+   minimize - Interface for minimizers of multivariate functions.
+The `minimize` function supports the following methods:
+.. toctree::
+   optimize.minimize-neldermead
+   optimize.minimize-powell
+   optimize.minimize-cg
+   optimize.minimize-bfgs
+   optimize.minimize-newtoncg
+   optimize.minimize-lbfgsb
+   optimize.minimize-tnc
+   optimize.minimize-cobyla
+   optimize.minimize-cobyqa
+   optimize.minimize-slsqp
+   optimize.minimize-trustconstr
+   optimize.minimize-dogleg
+   optimize.minimize-trustncg
+   optimize.minimize-trustkrylov
+   optimize.minimize-trustexact
+Constraints are passed to `minimize` function as a single object or
+as a list of objects from the following classes:
+.. autosummary::
+   :toctree: generated/
+   NonlinearConstraint - Class defining general nonlinear constraints.
+   LinearConstraint - Class defining general linear constraints.
+Simple bound constraints are handled separately and there is a special class
+for them:
+.. autosummary::
+   :toctree: generated/
+   Bounds - Bound constraints.
+Quasi-Newton strategies implementing `HessianUpdateStrategy`
+interface can be used to approximate the Hessian in `minimize`
+function (available only for the 'trust-constr' method). Available
+quasi-Newton methods implementing this interface are:
+.. autosummary::
+   :toctree: generated/
+   BFGS - Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
+   SR1 - Symmetric-rank-1 Hessian update strategy.
+.. _global_optimization:
+Global optimization
+-------------------
+.. autosummary::
+   :toctree: generated/
+   basinhopping - Basinhopping stochastic optimizer.
+   brute - Brute force searching optimizer.
+   differential_evolution - Stochastic optimizer using differential evolution.
+   shgo - Simplicial homology global optimizer.
+   dual_annealing - Dual annealing stochastic optimizer.
+   direct - DIRECT (Dividing Rectangles) optimizer.
+Least-squares and curve fitting
+===============================
+Nonlinear least-squares
+-----------------------
+.. autosummary::
+   :toctree: generated/
+   least_squares - Solve a nonlinear least-squares problem with bounds on the variables.
+Linear least-squares
+--------------------
+.. autosummary::
+   :toctree: generated/
+   nnls - Linear least-squares problem with non-negativity constraint.
+   lsq_linear - Linear least-squares problem with bound constraints.
+   isotonic_regression - Least squares problem of isotonic regression via PAVA.
+Curve fitting
+-------------
+.. autosummary::
+   :toctree: generated/
+   curve_fit -- Fit curve to a set of points.
+Root finding
+============
+Scalar functions
+----------------
+.. autosummary::
+   :toctree: generated/
+   root_scalar - Unified interface for nonlinear solvers of scalar functions.
+   brentq - quadratic interpolation Brent method.
+   brenth - Brent method, modified by Harris with hyperbolic extrapolation.
+   ridder - Ridder's method.
+   bisect - Bisection method.
+   newton - Newton's method (also Secant and Halley's methods).
+   toms748 - Alefeld, Potra & Shi Algorithm 748.
+   RootResults - The root finding result returned by some root finders.
+The `root_scalar` function supports the following methods:
+.. toctree::
+   optimize.root_scalar-brentq
+   optimize.root_scalar-brenth
+   optimize.root_scalar-bisect
+   optimize.root_scalar-ridder
+   optimize.root_scalar-newton
+   optimize.root_scalar-toms748
+   optimize.root_scalar-secant
+   optimize.root_scalar-halley
+The table below lists situations and appropriate methods, along with
+*asymptotic* convergence rates per iteration (and per function evaluation)
+for successful convergence to a simple root(*).
+Bisection is the slowest of them all, adding one bit of accuracy for each
+function evaluation, but is guaranteed to converge.
+The other bracketing methods all (eventually) increase the number of accurate
+bits by about 50% for every function evaluation.
+The derivative-based methods, all built on `newton`, can converge quite quickly
+if the initial value is close to the root.  They can also be applied to
+functions defined on (a subset of) the complex plane.
++-------------+----------+----------+-----------+-------------+-------------+----------------+
+| Domain of f | Bracket? |    Derivatives?      | Solvers     |        Convergence           |
++             +          +----------+-----------+             +-------------+----------------+
+|             |          | `fprime` | `fprime2` |             | Guaranteed? |  Rate(s)(*)    |
++=============+==========+==========+===========+=============+=============+================+
+| `R`         | Yes      | N/A      | N/A       | - bisection | - Yes       | - 1 "Linear"   |
+|             |          |          |           | - brentq    | - Yes       | - >=1, <= 1.62 |
+|             |          |          |           | - brenth    | - Yes       | - >=1, <= 1.62 |
+|             |          |          |           | - ridder    | - Yes       | - 2.0 (1.41)   |
+|             |          |          |           | - toms748   | - Yes       | - 2.7 (1.65)   |
++-------------+----------+----------+-----------+-------------+-------------+----------------+
+| `R` or `C`  | No       | No       | No        | secant      | No          | 1.62 (1.62)    |
++-------------+----------+----------+-----------+-------------+-------------+----------------+
+| `R` or `C`  | No       | Yes      | No        | newton      | No          | 2.00 (1.41)    |
++-------------+----------+----------+-----------+-------------+-------------+----------------+
+| `R` or `C`  | No       | Yes      | Yes       | halley      | No          | 3.00 (1.44)    |
++-------------+----------+----------+-----------+-------------+-------------+----------------+
+.. seealso::
+   `scipy.optimize.cython_optimize` -- Typed Cython versions of root finding functions
+Fixed point finding:
+.. autosummary::
+   :toctree: generated/
+   fixed_point - Single-variable fixed-point solver.
+Multidimensional
+----------------
+.. autosummary::
+   :toctree: generated/
+   root - Unified interface for nonlinear solvers of multivariate functions.
+The `root` function supports the following methods:
+.. toctree::
+   optimize.root-hybr
+   optimize.root-lm
+   optimize.root-broyden1
+   optimize.root-broyden2
+   optimize.root-anderson
+   optimize.root-linearmixing
+   optimize.root-diagbroyden
+   optimize.root-excitingmixing
+   optimize.root-krylov
+   optimize.root-dfsane
+Linear programming / MILP
+=========================
+.. autosummary::
+   :toctree: generated/
+   milp -- Mixed integer linear programming.
+   linprog -- Unified interface for minimizers of linear programming problems.
+The `linprog` function supports the following methods:
+.. toctree::
+   optimize.linprog-simplex
+   optimize.linprog-interior-point
+   optimize.linprog-revised_simplex
+   optimize.linprog-highs-ipm
+   optimize.linprog-highs-ds
+   optimize.linprog-highs
+The simplex, interior-point, and revised simplex methods support callback
+functions, such as:
+.. autosummary::
+   :toctree: generated/
+   linprog_verbose_callback -- Sample callback function for linprog (simplex).
+Assignment problems
+===================
+.. autosummary::
+   :toctree: generated/
+   linear_sum_assignment -- Solves the linear-sum assignment problem.
+   quadratic_assignment -- Solves the quadratic assignment problem.
+The `quadratic_assignment` function supports the following methods:
+.. toctree::
+   optimize.qap-faq
+   optimize.qap-2opt
+Utilities
+=========
+Finite-difference approximation
+-------------------------------
+.. autosummary::
+   :toctree: generated/
+   approx_fprime - Approximate the gradient of a scalar function.
+   check_grad - Check the supplied derivative using finite differences.
+Line search
+-----------
+.. autosummary::
+   :toctree: generated/
+   bracket - Bracket a minimum, given two starting points.
+   line_search - Return a step that satisfies the strong Wolfe conditions.
+Hessian approximation
+---------------------
+.. autosummary::
+   :toctree: generated/
+   LbfgsInvHessProduct - Linear operator for L-BFGS approximate inverse Hessian.
+   HessianUpdateStrategy - Interface for implementing Hessian update strategies
+Benchmark problems
+------------------
+.. autosummary::
+   :toctree: generated/
+   rosen - The Rosenbrock function.
+   rosen_der - The derivative of the Rosenbrock function.
+   rosen_hess - The Hessian matrix of the Rosenbrock function.
+   rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
+Legacy functions
+================
+The functions below are not recommended for use in new scripts;
+all of these methods are accessible via a newer, more consistent
+interfaces, provided by the interfaces above.
+Optimization
+------------
+General-purpose multivariate methods:
+.. autosummary::
+   :toctree: generated/
+   fmin - Nelder-Mead Simplex algorithm.
+   fmin_powell - Powell's (modified) conjugate direction method.
+   fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm.
+   fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno).
+   fmin_ncg - Line-search Newton Conjugate Gradient.
+Constrained multivariate methods:
+.. autosummary::
+   :toctree: generated/
+   fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer.
+   fmin_tnc - Truncated Newton code.
+   fmin_cobyla - Constrained optimization by linear approximation.
+   fmin_slsqp - Minimization using sequential least-squares programming.
+Univariate (scalar) minimization methods:
+.. autosummary::
+   :toctree: generated/
+   fminbound - Bounded minimization of a scalar function.
+   brent - 1-D function minimization using Brent method.
+   golden - 1-D function minimization using Golden Section method.
+Least-squares
+-------------
+.. autosummary::
+   :toctree: generated/
+   leastsq - Minimize the sum of squares of M equations in N unknowns.
+Root finding
+------------
+General nonlinear solvers:
+.. autosummary::
+   :toctree: generated/
+   fsolve - Non-linear multivariable equation solver.
+   broyden1 - Broyden's first method.
+   broyden2 - Broyden's second method.
+   NoConvergence -  Exception raised when nonlinear solver does not converge.
+Large-scale nonlinear solvers:
+.. autosummary::
+   :toctree: generated/
+   newton_krylov
+   anderson
+   BroydenFirst
+   InverseJacobian
+   KrylovJacobian
+Simple iteration solvers:
+.. autosummary::
+   :toctree: generated/
+   excitingmixing
+   linearmixing
+   diagbroyden
+"""  # noqa: E501
+from ._optimize import *
+from ._minimize import *
+from ._root import *
+from ._root_scalar import *
+from ._minpack_py import *
+from ._zeros_py import *
+from ._lbfgsb_py import fmin_l_bfgs_b, LbfgsInvHessProduct
+from ._tnc import fmin_tnc
+from ._cobyla_py import fmin_cobyla
+from ._nonlin import *
+from ._slsqp_py import fmin_slsqp
+from ._nnls import nnls
+from ._basinhopping import basinhopping
+from ._linprog import linprog, linprog_verbose_callback
+from ._lsap import linear_sum_assignment
+from ._differentialevolution import differential_evolution
+from ._lsq import least_squares, lsq_linear
+from ._isotonic import isotonic_regression
+from ._constraints import (NonlinearConstraint,
+                           LinearConstraint,
+                           Bounds)
+from ._hessian_update_strategy import HessianUpdateStrategy, BFGS, SR1
+from ._shgo import shgo
+from ._dual_annealing import dual_annealing
+from ._qap import quadratic_assignment
+from ._direct_py import direct
+from ._milp import milp
+# Deprecated namespaces, to be removed in v2.0.0
+from . import (
+    cobyla, lbfgsb, linesearch, minpack, minpack2, moduleTNC, nonlin, optimize,
+    slsqp, tnc, zeros
+)
+__all__ = [s for s in dir() if not s.startswith('_')]
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py ADDED Viewed

	@@ -0,0 +1,753 @@

+"""
+basinhopping: The basinhopping global optimization algorithm
+"""
+import numpy as np
+import math
+import inspect
+import scipy.optimize
+from scipy._lib._util import check_random_state
+__all__ = ['basinhopping']
+_params = (inspect.Parameter('res_new', kind=inspect.Parameter.KEYWORD_ONLY),
+           inspect.Parameter('res_old', kind=inspect.Parameter.KEYWORD_ONLY))
+_new_accept_test_signature = inspect.Signature(parameters=_params)
+class Storage:
+    """
+    Class used to store the lowest energy structure
+    """
+    def __init__(self, minres):
+        self._add(minres)
+    def _add(self, minres):
+        self.minres = minres
+        self.minres.x = np.copy(minres.x)
+    def update(self, minres):
+        if minres.success and (minres.fun < self.minres.fun
+                               or not self.minres.success):
+            self._add(minres)
+            return True
+        else:
+            return False
+    def get_lowest(self):
+        return self.minres
+class BasinHoppingRunner:
+    """This class implements the core of the basinhopping algorithm.
+    x0 : ndarray
+        The starting coordinates.
+    minimizer : callable
+        The local minimizer, with signature ``result = minimizer(x)``.
+        The return value is an `optimize.OptimizeResult` object.
+    step_taking : callable
+        This function displaces the coordinates randomly. Signature should
+        be ``x_new = step_taking(x)``. Note that `x` may be modified in-place.
+    accept_tests : list of callables
+        Each test is passed the kwargs `f_new`, `x_new`, `f_old` and
+        `x_old`. These tests will be used to judge whether or not to accept
+        the step. The acceptable return values are True, False, or ``"force
+        accept"``. If any of the tests return False then the step is rejected.
+        If ``"force accept"``, then this will override any other tests in
+        order to accept the step. This can be used, for example, to forcefully
+        escape from a local minimum that ``basinhopping`` is trapped in.
+    disp : bool, optional
+        Display status messages.
+    """
+    def __init__(self, x0, minimizer, step_taking, accept_tests, disp=False):
+        self.x = np.copy(x0)
+        self.minimizer = minimizer
+        self.step_taking = step_taking
+        self.accept_tests = accept_tests
+        self.disp = disp
+        self.nstep = 0
+        # initialize return object
+        self.res = scipy.optimize.OptimizeResult()
+        self.res.minimization_failures = 0
+        # do initial minimization
+        minres = minimizer(self.x)
+        if not minres.success:
+            self.res.minimization_failures += 1
+            if self.disp:
+                print("warning: basinhopping: local minimization failure")
+        self.x = np.copy(minres.x)
+        self.energy = minres.fun
+        self.incumbent_minres = minres  # best minimize result found so far
+        if self.disp:
+            print("basinhopping step %d: f %g" % (self.nstep, self.energy))
+        # initialize storage class
+        self.storage = Storage(minres)
+        if hasattr(minres, "nfev"):
+            self.res.nfev = minres.nfev
+        if hasattr(minres, "njev"):
+            self.res.njev = minres.njev
+        if hasattr(minres, "nhev"):
+            self.res.nhev = minres.nhev
+    def _monte_carlo_step(self):
+        """Do one Monte Carlo iteration
+        Randomly displace the coordinates, minimize, and decide whether
+        or not to accept the new coordinates.
+        """
+        # Take a random step.  Make a copy of x because the step_taking
+        # algorithm might change x in place
+        x_after_step = np.copy(self.x)
+        x_after_step = self.step_taking(x_after_step)
+        # do a local minimization
+        minres = self.minimizer(x_after_step)
+        x_after_quench = minres.x
+        energy_after_quench = minres.fun
+        if not minres.success:
+            self.res.minimization_failures += 1
+            if self.disp:
+                print("warning: basinhopping: local minimization failure")
+        if hasattr(minres, "nfev"):
+            self.res.nfev += minres.nfev
+        if hasattr(minres, "njev"):
+            self.res.njev += minres.njev
+        if hasattr(minres, "nhev"):
+            self.res.nhev += minres.nhev
+        # accept the move based on self.accept_tests. If any test is False,
+        # then reject the step.  If any test returns the special string
+        # 'force accept', then accept the step regardless. This can be used
+        # to forcefully escape from a local minimum if normal basin hopping
+        # steps are not sufficient.
+        accept = True
+        for test in self.accept_tests:
+            if inspect.signature(test) == _new_accept_test_signature:
+                testres = test(res_new=minres, res_old=self.incumbent_minres)
+            else:
+                testres = test(f_new=energy_after_quench, x_new=x_after_quench,
+                               f_old=self.energy, x_old=self.x)
+            if testres == 'force accept':
+                accept = True
+                break
+            elif testres is None:
+                raise ValueError("accept_tests must return True, False, or "
+                                 "'force accept'")
+            elif not testres:
+                accept = False
+        # Report the result of the acceptance test to the take step class.
+        # This is for adaptive step taking
+        if hasattr(self.step_taking, "report"):
+            self.step_taking.report(accept, f_new=energy_after_quench,
+                                    x_new=x_after_quench, f_old=self.energy,
+                                    x_old=self.x)
+        return accept, minres
+    def one_cycle(self):
+        """Do one cycle of the basinhopping algorithm
+        """
+        self.nstep += 1
+        new_global_min = False
+        accept, minres = self._monte_carlo_step()
+        if accept:
+            self.energy = minres.fun
+            self.x = np.copy(minres.x)
+            self.incumbent_minres = minres  # best minimize result found so far
+            new_global_min = self.storage.update(minres)
+        # print some information
+        if self.disp:
+            self.print_report(minres.fun, accept)
+            if new_global_min:
+                print("found new global minimum on step %d with function"
+                      " value %g" % (self.nstep, self.energy))
+        # save some variables as BasinHoppingRunner attributes
+        self.xtrial = minres.x
+        self.energy_trial = minres.fun
+        self.accept = accept
+        return new_global_min
+    def print_report(self, energy_trial, accept):
+        """print a status update"""
+        minres = self.storage.get_lowest()
+        print("basinhopping step %d: f %g trial_f %g accepted %d "
+              " lowest_f %g" % (self.nstep, self.energy, energy_trial,
+                                accept, minres.fun))
+class AdaptiveStepsize:
+    """
+    Class to implement adaptive stepsize.
+    This class wraps the step taking class and modifies the stepsize to
+    ensure the true acceptance rate is as close as possible to the target.
+    Parameters
+    ----------
+    takestep : callable
+        The step taking routine.  Must contain modifiable attribute
+        takestep.stepsize
+    accept_rate : float, optional
+        The target step acceptance rate
+    interval : int, optional
+        Interval for how often to update the stepsize
+    factor : float, optional
+        The step size is multiplied or divided by this factor upon each
+        update.
+    verbose : bool, optional
+        Print information about each update
+    """
+    def __init__(self, takestep, accept_rate=0.5, interval=50, factor=0.9,
+                 verbose=True):
+        self.takestep = takestep
+        self.target_accept_rate = accept_rate
+        self.interval = interval
+        self.factor = factor
+        self.verbose = verbose
+        self.nstep = 0
+        self.nstep_tot = 0
+        self.naccept = 0
+    def __call__(self, x):
+        return self.take_step(x)
+    def _adjust_step_size(self):
+        old_stepsize = self.takestep.stepsize
+        accept_rate = float(self.naccept) / self.nstep
+        if accept_rate > self.target_accept_rate:
+            # We're accepting too many steps. This generally means we're
+            # trapped in a basin. Take bigger steps.
+            self.takestep.stepsize /= self.factor
+        else:
+            # We're not accepting enough steps. Take smaller steps.
+            self.takestep.stepsize *= self.factor
+        if self.verbose:
+            print(f"adaptive stepsize: acceptance rate {accept_rate:f} target "
+                  f"{self.target_accept_rate:f} new stepsize "
+                  f"{self.takestep.stepsize:g} old stepsize {old_stepsize:g}")
+    def take_step(self, x):
+        self.nstep += 1
+        self.nstep_tot += 1
+        if self.nstep % self.interval == 0:
+            self._adjust_step_size()
+        return self.takestep(x)
+    def report(self, accept, **kwargs):
+        "called by basinhopping to report the result of the step"
+        if accept:
+            self.naccept += 1
+class RandomDisplacement:
+    """Add a random displacement of maximum size `stepsize` to each coordinate.
+    Calling this updates `x` in-place.
+    Parameters
+    ----------
+    stepsize : float, optional
+        Maximum stepsize in any dimension
+    random_gen : {None, int, `numpy.random.Generator`,
+                  `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+    """
+    def __init__(self, stepsize=0.5, random_gen=None):
+        self.stepsize = stepsize
+        self.random_gen = check_random_state(random_gen)
+    def __call__(self, x):
+        x += self.random_gen.uniform(-self.stepsize, self.stepsize,
+                                     np.shape(x))
+        return x
+class MinimizerWrapper:
+    """
+    wrap a minimizer function as a minimizer class
+    """
+    def __init__(self, minimizer, func=None, **kwargs):
+        self.minimizer = minimizer
+        self.func = func
+        self.kwargs = kwargs
+    def __call__(self, x0):
+        if self.func is None:
+            return self.minimizer(x0, **self.kwargs)
+        else:
+            return self.minimizer(self.func, x0, **self.kwargs)
+class Metropolis:
+    """Metropolis acceptance criterion.
+    Parameters
+    ----------
+    T : float
+        The "temperature" parameter for the accept or reject criterion.
+    random_gen : {None, int, `numpy.random.Generator`,
+                  `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+        Random number generator used for acceptance test.
+    """
+    def __init__(self, T, random_gen=None):
+        # Avoid ZeroDivisionError since "MBH can be regarded as a special case
+        # of the BH framework with the Metropolis criterion, where temperature
+        # T = 0." (Reject all steps that increase energy.)
+        self.beta = 1.0 / T if T != 0 else float('inf')
+        self.random_gen = check_random_state(random_gen)
+    def accept_reject(self, res_new, res_old):
+        """
+        Assuming the local search underlying res_new was successful:
+        If new energy is lower than old, it will always be accepted.
+        If new is higher than old, there is a chance it will be accepted,
+        less likely for larger differences.
+        """
+        with np.errstate(invalid='ignore'):
+            # The energy values being fed to Metropolis are 1-length arrays, and if
+            # they are equal, their difference is 0, which gets multiplied by beta,
+            # which is inf, and array([0]) * float('inf') causes
+            #
+            # RuntimeWarning: invalid value encountered in multiply
+            #
+            # Ignore this warning so when the algorithm is on a flat plane, it always
+            # accepts the step, to try to move off the plane.
+            prod = -(res_new.fun - res_old.fun) * self.beta
+            w = math.exp(min(0, prod))
+        rand = self.random_gen.uniform()
+        return w >= rand and (res_new.success or not res_old.success)
+    def __call__(self, *, res_new, res_old):
+        """
+        f_new and f_old are mandatory in kwargs
+        """
+        return bool(self.accept_reject(res_new, res_old))
+def basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5,
+                 minimizer_kwargs=None, take_step=None, accept_test=None,
+                 callback=None, interval=50, disp=False, niter_success=None,
+                 seed=None, *, target_accept_rate=0.5, stepwise_factor=0.9):
+    """Find the global minimum of a function using the basin-hopping algorithm.
+    Basin-hopping is a two-phase method that combines a global stepping
+    algorithm with local minimization at each step. Designed to mimic
+    the natural process of energy minimization of clusters of atoms, it works
+    well for similar problems with "funnel-like, but rugged" energy landscapes
+    [5]_.
+    As the step-taking, step acceptance, and minimization methods are all
+    customizable, this function can also be used to implement other two-phase
+    methods.
+    Parameters
+    ----------
+    func : callable ``f(x, *args)``
+        Function to be optimized.  ``args`` can be passed as an optional item
+        in the dict `minimizer_kwargs`
+    x0 : array_like
+        Initial guess.
+    niter : integer, optional
+        The number of basin-hopping iterations. There will be a total of
+        ``niter + 1`` runs of the local minimizer.
+    T : float, optional
+        The "temperature" parameter for the acceptance or rejection criterion.
+        Higher "temperatures" mean that larger jumps in function value will be
+        accepted.  For best results `T` should be comparable to the
+        separation (in function value) between local minima.
+    stepsize : float, optional
+        Maximum step size for use in the random displacement.
+    minimizer_kwargs : dict, optional
+        Extra keyword arguments to be passed to the local minimizer
+        `scipy.optimize.minimize` Some important options could be:
+            method : str
+                The minimization method (e.g. ``"L-BFGS-B"``)
+            args : tuple
+                Extra arguments passed to the objective function (`func`) and
+                its derivatives (Jacobian, Hessian).
+    take_step : callable ``take_step(x)``, optional
+        Replace the default step-taking routine with this routine. The default
+        step-taking routine is a random displacement of the coordinates, but
+        other step-taking algorithms may be better for some systems.
+        `take_step` can optionally have the attribute ``take_step.stepsize``.
+        If this attribute exists, then `basinhopping` will adjust
+        ``take_step.stepsize`` in order to try to optimize the global minimum
+        search.
+    accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
+        Define a test which will be used to judge whether to accept the
+        step. This will be used in addition to the Metropolis test based on
+        "temperature" `T`. The acceptable return values are True,
+        False, or ``"force accept"``. If any of the tests return False
+        then the step is rejected. If the latter, then this will override any
+        other tests in order to accept the step. This can be used, for example,
+        to forcefully escape from a local minimum that `basinhopping` is
+        trapped in.
+    callback : callable, ``callback(x, f, accept)``, optional
+        A callback function which will be called for all minima found. ``x``
+        and ``f`` are the coordinates and function value of the trial minimum,
+        and ``accept`` is whether that minimum was accepted. This can
+        be used, for example, to save the lowest N minima found. Also,
+        `callback` can be used to specify a user defined stop criterion by
+        optionally returning True to stop the `basinhopping` routine.
+    interval : integer, optional
+        interval for how often to update the `stepsize`
+    disp : bool, optional
+        Set to True to print status messages
+    niter_success : integer, optional
+        Stop the run if the global minimum candidate remains the same for this
+        number of iterations.
+    seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+        Specify `seed` for repeatable minimizations. The random numbers
+        generated with this seed only affect the default Metropolis
+        `accept_test` and the default `take_step`. If you supply your own
+        `take_step` and `accept_test`, and these functions use random
+        number generation, then those functions are responsible for the state
+        of their random number generator.
+    target_accept_rate : float, optional
+        The target acceptance rate that is used to adjust the `stepsize`.
+        If the current acceptance rate is greater than the target,
+        then the `stepsize` is increased. Otherwise, it is decreased.
+        Range is (0, 1). Default is 0.5.
+        .. versionadded:: 1.8.0
+    stepwise_factor : float, optional
+        The `stepsize` is multiplied or divided by this stepwise factor upon
+        each update. Range is (0, 1). Default is 0.9.
+        .. versionadded:: 1.8.0
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a `OptimizeResult` object.
+        Important attributes are: ``x`` the solution array, ``fun`` the value
+        of the function at the solution, and ``message`` which describes the
+        cause of the termination. The ``OptimizeResult`` object returned by the
+        selected minimizer at the lowest minimum is also contained within this
+        object and can be accessed through the ``lowest_optimization_result``
+        attribute.  See `OptimizeResult` for a description of other attributes.
+    See Also
+    --------
+    minimize :
+        The local minimization function called once for each basinhopping step.
+        `minimizer_kwargs` is passed to this routine.
+    Notes
+    -----
+    Basin-hopping is a stochastic algorithm which attempts to find the global
+    minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
+    [4]_. The algorithm in its current form was described by David Wales and
+    Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
+    The algorithm is iterative with each cycle composed of the following
+    features
+    1) random perturbation of the coordinates
+    2) local minimization
+    3) accept or reject the new coordinates based on the minimized function
+       value
+    The acceptance test used here is the Metropolis criterion of standard Monte
+    Carlo algorithms, although there are many other possibilities [3]_.
+    This global minimization method has been shown to be extremely efficient
+    for a wide variety of problems in physics and chemistry. It is
+    particularly useful when the function has many minima separated by large
+    barriers. See the `Cambridge Cluster Database
+    <https://www-wales.ch.cam.ac.uk/CCD.html>`_ for databases of molecular
+    systems that have been optimized primarily using basin-hopping. This
+    database includes minimization problems exceeding 300 degrees of freedom.
+    See the free software program `GMIN <https://www-wales.ch.cam.ac.uk/GMIN>`_
+    for a Fortran implementation of basin-hopping. This implementation has many
+    variations of the procedure described above, including more
+    advanced step taking algorithms and alternate acceptance criterion.
+    For stochastic global optimization there is no way to determine if the true
+    global minimum has actually been found. Instead, as a consistency check,
+    the algorithm can be run from a number of different random starting points
+    to ensure the lowest minimum found in each example has converged to the
+    global minimum. For this reason, `basinhopping` will by default simply
+    run for the number of iterations `niter` and return the lowest minimum
+    found. It is left to the user to ensure that this is in fact the global
+    minimum.
+    Choosing `stepsize`:  This is a crucial parameter in `basinhopping` and
+    depends on the problem being solved. The step is chosen uniformly in the
+    region from x0-stepsize to x0+stepsize, in each dimension. Ideally, it
+    should be comparable to the typical separation (in argument values) between
+    local minima of the function being optimized. `basinhopping` will, by
+    default, adjust `stepsize` to find an optimal value, but this may take
+    many iterations. You will get quicker results if you set a sensible
+    initial value for ``stepsize``.
+    Choosing `T`: The parameter `T` is the "temperature" used in the
+    Metropolis criterion. Basinhopping steps are always accepted if
+    ``func(xnew) < func(xold)``. Otherwise, they are accepted with
+    probability::
+        exp( -(func(xnew) - func(xold)) / T )
+    So, for best results, `T` should to be comparable to the typical
+    difference (in function values) between local minima. (The height of
+    "walls" between local minima is irrelevant.)
+    If `T` is 0, the algorithm becomes Monotonic Basin-Hopping, in which all
+    steps that increase energy are rejected.
+    .. versionadded:: 0.12.0
+    References
+    ----------
+    .. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
+        Cambridge, UK.
+    .. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
+        the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
+        110 Atoms.  Journal of Physical Chemistry A, 1997, 101, 5111.
+    .. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
+        multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
+        1987, 84, 6611.
+    .. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
+        crystals, and biomolecules, Science, 1999, 285, 1368.
+    .. [5] Olson, B., Hashmi, I., Molloy, K., and Shehu1, A., Basin Hopping as
+        a General and Versatile Optimization Framework for the Characterization
+        of Biological Macromolecules, Advances in Artificial Intelligence,
+        Volume 2012 (2012), Article ID 674832, :doi:`10.1155/2012/674832`
+    Examples
+    --------
+    The following example is a 1-D minimization problem, with many
+    local minima superimposed on a parabola.
+    >>> import numpy as np
+    >>> from scipy.optimize import basinhopping
+    >>> func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x
+    >>> x0 = [1.]
+    Basinhopping, internally, uses a local minimization algorithm. We will use
+    the parameter `minimizer_kwargs` to tell basinhopping which algorithm to
+    use and how to set up that minimizer. This parameter will be passed to
+    `scipy.optimize.minimize`.
+    >>> minimizer_kwargs = {"method": "BFGS"}
+    >>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
+    ...                    niter=200)
+    >>> # the global minimum is:
+    >>> ret.x, ret.fun
+    -0.1951, -1.0009
+    Next consider a 2-D minimization problem. Also, this time, we
+    will use gradient information to significantly speed up the search.
+    >>> def func2d(x):
+    ...     f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
+    ...                                                            0.2) * x[0]
+    ...     df = np.zeros(2)
+    ...     df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
+    ...     df[1] = 2. * x[1] + 0.2
+    ...     return f, df
+    We'll also use a different local minimization algorithm. Also, we must tell
+    the minimizer that our function returns both energy and gradient (Jacobian).
+    >>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
+    >>> x0 = [1.0, 1.0]
+    >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
+    ...                    niter=200)
+    >>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
+    ...                                                           ret.x[1],
+    ...                                                           ret.fun))
+    global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
+    Here is an example using a custom step-taking routine. Imagine you want
+    the first coordinate to take larger steps than the rest of the coordinates.
+    This can be implemented like so:
+    >>> class MyTakeStep:
+    ...    def __init__(self, stepsize=0.5):
+    ...        self.stepsize = stepsize
+    ...        self.rng = np.random.default_rng()
+    ...    def __call__(self, x):
+    ...        s = self.stepsize
+    ...        x[0] += self.rng.uniform(-2.*s, 2.*s)
+    ...        x[1:] += self.rng.uniform(-s, s, x[1:].shape)
+    ...        return x
+    Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
+    of `stepsize` to optimize the search. We'll use the same 2-D function as
+    before
+    >>> mytakestep = MyTakeStep()
+    >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
+    ...                    niter=200, take_step=mytakestep)
+    >>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
+    ...                                                           ret.x[1],
+    ...                                                           ret.fun))
+    global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
+    Now, let's do an example using a custom callback function which prints the
+    value of every minimum found
+    >>> def print_fun(x, f, accepted):
+    ...         print("at minimum %.4f accepted %d" % (f, int(accepted)))
+    We'll run it for only 10 basinhopping steps this time.
+    >>> rng = np.random.default_rng()
+    >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
+    ...                    niter=10, callback=print_fun, seed=rng)
+    at minimum 0.4159 accepted 1
+    at minimum -0.4317 accepted 1
+    at minimum -1.0109 accepted 1
+    at minimum -0.9073 accepted 1
+    at minimum -0.4317 accepted 0
+    at minimum -0.1021 accepted 1
+    at minimum -0.7425 accepted 1
+    at minimum -0.9073 accepted 1
+    at minimum -0.4317 accepted 0
+    at minimum -0.7425 accepted 1
+    at minimum -0.9073 accepted 1
+    The minimum at -1.0109 is actually the global minimum, found already on the
+    8th iteration.
+    """ # numpy/numpydoc#87  # noqa: E501
+    if target_accept_rate <= 0. or target_accept_rate >= 1.:
+        raise ValueError('target_accept_rate has to be in range (0, 1)')
+    if stepwise_factor <= 0. or stepwise_factor >= 1.:
+        raise ValueError('stepwise_factor has to be in range (0, 1)')
+    x0 = np.array(x0)
+    # set up the np.random generator
+    rng = check_random_state(seed)
+    # set up minimizer
+    if minimizer_kwargs is None:
+        minimizer_kwargs = dict()
+    wrapped_minimizer = MinimizerWrapper(scipy.optimize.minimize, func,
+                                         **minimizer_kwargs)
+    # set up step-taking algorithm
+    if take_step is not None:
+        if not callable(take_step):
+            raise TypeError("take_step must be callable")
+        # if take_step.stepsize exists then use AdaptiveStepsize to control
+        # take_step.stepsize
+        if hasattr(take_step, "stepsize"):
+            take_step_wrapped = AdaptiveStepsize(
+                take_step, interval=interval,
+                accept_rate=target_accept_rate,
+                factor=stepwise_factor,
+                verbose=disp)
+        else:
+            take_step_wrapped = take_step
+    else:
+        # use default
+        displace = RandomDisplacement(stepsize=stepsize, random_gen=rng)
+        take_step_wrapped = AdaptiveStepsize(displace, interval=interval,
+                                             accept_rate=target_accept_rate,
+                                             factor=stepwise_factor,
+                                             verbose=disp)
+    # set up accept tests
+    accept_tests = []
+    if accept_test is not None:
+        if not callable(accept_test):
+            raise TypeError("accept_test must be callable")
+        accept_tests = [accept_test]
+    # use default
+    metropolis = Metropolis(T, random_gen=rng)
+    accept_tests.append(metropolis)
+    if niter_success is None:
+        niter_success = niter + 2
+    bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
+                            accept_tests, disp=disp)
+    # The wrapped minimizer is called once during construction of
+    # BasinHoppingRunner, so run the callback
+    if callable(callback):
+        callback(bh.storage.minres.x, bh.storage.minres.fun, True)
+    # start main iteration loop
+    count, i = 0, 0
+    message = ["requested number of basinhopping iterations completed"
+               " successfully"]
+    for i in range(niter):
+        new_global_min = bh.one_cycle()
+        if callable(callback):
+            # should we pass a copy of x?
+            val = callback(bh.xtrial, bh.energy_trial, bh.accept)
+            if val is not None:
+                if val:
+                    message = ["callback function requested stop early by"
+                               "returning True"]
+                    break
+        count += 1
+        if new_global_min:
+            count = 0
+        elif count > niter_success:
+            message = ["success condition satisfied"]
+            break
+    # prepare return object
+    res = bh.res
+    res.lowest_optimization_result = bh.storage.get_lowest()
+    res.x = np.copy(res.lowest_optimization_result.x)
+    res.fun = res.lowest_optimization_result.fun
+    res.message = message
+    res.nit = i + 1
+    res.success = res.lowest_optimization_result.success
+    return res

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py ADDED Viewed

	@@ -0,0 +1,666 @@

+import numpy as np
+import scipy._lib._elementwise_iterative_method as eim
+from scipy._lib._util import _RichResult
+_ELIMITS = -1  # used in _bracket_root
+_ESTOPONESIDE = 2  # used in _bracket_root
+def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter):
+    if not callable(func):
+        raise ValueError('`func` must be callable.')
+    if not np.iterable(args):
+        args = (args,)
+    xl0 = np.asarray(xl0)[()]
+    if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
+        raise ValueError('`xl0` must be numeric and real.')
+    xr0 = xl0 + 1 if xr0 is None else xr0
+    xmin = -np.inf if xmin is None else xmin
+    xmax = np.inf if xmax is None else xmax
+    factor = 2. if factor is None else factor
+    xl0, xr0, xmin, xmax, factor = np.broadcast_arrays(xl0, xr0, xmin, xmax, factor)
+    if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
+        raise ValueError('`xr0` must be numeric and real.')
+    if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
+        raise ValueError('`xmin` must be numeric and real.')
+    if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
+        raise ValueError('`xmax` must be numeric and real.')
+    if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
+        raise ValueError('`factor` must be numeric and real.')
+    if not np.all(factor > 1):
+        raise ValueError('All elements of `factor` must be greater than 1.')
+    maxiter = np.asarray(maxiter)
+    message = '`maxiter` must be a non-negative integer.'
+    if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
+            or np.iscomplex(maxiter)):
+        raise ValueError(message)
+    maxiter_int = int(maxiter[()])
+    if not maxiter == maxiter_int or maxiter < 0:
+        raise ValueError(message)
+    return func, xl0, xr0, xmin, xmax, factor, args, maxiter
+def _bracket_root(func, xl0, xr0=None, *, xmin=None, xmax=None, factor=None,
+                  args=(), maxiter=1000):
+    """Bracket the root of a monotonic scalar function of one variable
+    This function works elementwise when `xl0`, `xr0`, `xmin`, `xmax`, `factor`, and
+    the elements of `args` are broadcastable arrays.
+    Parameters
+    ----------
+    func : callable
+        The function for which the root is to be bracketed.
+        The signature must be::
+            func(x: ndarray, *args) -> ndarray
+        where each element of ``x`` is a finite real and ``args`` is a tuple,
+        which may contain an arbitrary number of arrays that are broadcastable
+        with `x`. ``func`` must be an elementwise function: each element
+        ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
+    xl0, xr0: float array_like
+        Starting guess of bracket, which need not contain a root. If `xr0` is
+        not provided, ``xr0 = xl0 + 1``. Must be broadcastable with one another.
+    xmin, xmax : float array_like, optional
+        Minimum and maximum allowable endpoints of the bracket, inclusive. Must
+        be broadcastable with `xl0` and `xr0`.
+    factor : float array_like, default: 2
+        The factor used to grow the bracket. See notes for details.
+    args : tuple, optional
+        Additional positional arguments to be passed to `func`.  Must be arrays
+        broadcastable with `xl0`, `xr0`, `xmin`, and `xmax`. If the callable to be
+        bracketed requires arguments that are not broadcastable with these
+        arrays, wrap that callable with `func` such that `func` accepts
+        only `x` and broadcastable arrays.
+    maxiter : int, optional
+        The maximum number of iterations of the algorithm to perform.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes. The descriptions are written as though the values will be
+        scalars; however, if `func` returns an array, the outputs will be
+        arrays of the same shape.
+        xl, xr : float
+            The lower and upper ends of the bracket, if the algorithm
+            terminated successfully.
+        fl, fr : float
+            The function value at the lower and upper ends of the bracket.
+        nfev : int
+            The number of function evaluations required to find the bracket.
+            This is distinct from the number of times `func` is *called*
+            because the function may evaluated at multiple points in a single
+            call.
+        nit : int
+            The number of iterations of the algorithm that were performed.
+        status : int
+            An integer representing the exit status of the algorithm.
+            - ``0`` : The algorithm produced a valid bracket.
+            - ``-1`` : The bracket expanded to the allowable limits without finding a bracket.
+            - ``-2`` : The maximum number of iterations was reached.
+            - ``-3`` : A non-finite value was encountered.
+            - ``-4`` : Iteration was terminated by `callback`.
+            - ``-5``: The initial bracket does not satisfy `xmin <= xl0 < xr0 < xmax`.
+            - ``1`` : The algorithm is proceeding normally (in `callback` only).
+            - ``2`` : A bracket was found in the opposite search direction (in `callback` only).
+        success : bool
+            ``True`` when the algorithm terminated successfully (status ``0``).
+    Notes
+    -----
+    This function generalizes an algorithm found in pieces throughout
+    `scipy.stats`. The strategy is to iteratively grow the bracket `(l, r)`
+     until ``func(l) < 0 < func(r)``. The bracket grows to the left as follows.
+    - If `xmin` is not provided, the distance between `xl0` and `l` is iteratively
+      increased by `factor`.
+    - If `xmin` is provided, the distance between `xmin` and `l` is iteratively
+      decreased by `factor`. Note that this also *increases* the bracket size.
+    Growth of the bracket to the right is analogous.
+    Growth of the bracket in one direction stops when the endpoint is no longer
+    finite, the function value at the endpoint is no longer finite, or the
+    endpoint reaches its limiting value (`xmin` or `xmax`). Iteration terminates
+    when the bracket stops growing in both directions, the bracket surrounds
+    the root, or a root is found (accidentally).
+    If two brackets are found - that is, a bracket is found on both sides in
+    the same iteration, the smaller of the two is returned.
+    If roots of the function are found, both `l` and `r` are set to the
+    leftmost root.
+    """  # noqa: E501
+    # Todo:
+    # - find bracket with sign change in specified direction
+    # - Add tolerance
+    # - allow factor < 1?
+    callback = None  # works; I just don't want to test it
+    temp = _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter)
+    func, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
+    xs = (xl0, xr0)
+    temp = eim._initialize(func, xs, args)
+    func, xs, fs, args, shape, dtype, xp = temp  # line split for PEP8
+    xl0, xr0 = xs
+    xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
+    xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
+    invalid_bracket = ~((xmin <= xl0) & (xl0 < xr0) & (xr0 <= xmax))
+    # The approach is to treat the left and right searches as though they were
+    # (almost) totally independent one-sided bracket searches. (The interaction
+    # is considered when checking for termination and preparing the result
+    # object.)
+    # `x` is the "moving" end of the bracket
+    x = np.concatenate(xs)
+    f = np.concatenate(fs)
+    invalid_bracket = np.concatenate((invalid_bracket, invalid_bracket))
+    n = len(x) // 2
+    # `x_last` is the previous location of the moving end of the bracket. If
+    # the signs of `f` and `f_last` are different, `x` and `x_last` form a
+    # bracket.
+    x_last = np.concatenate((x[n:], x[:n]))
+    f_last = np.concatenate((f[n:], f[:n]))
+    # `x0` is the "fixed" end of the bracket.
+    x0 = x_last
+    # We don't need to retain the corresponding function value, since the
+    # fixed end of the bracket is only needed to compute the new value of the
+    # moving end; it is never returned.
+    limit = np.concatenate((xmin, xmax))
+    factor = np.broadcast_to(factor, shape).astype(dtype, copy=False).ravel()
+    factor = np.concatenate((factor, factor))
+    active = np.arange(2*n)
+    args = [np.concatenate((arg, arg)) for arg in args]
+    # This is needed due to inner workings of `eim._loop`.
+    # We're abusing it a tiny bit.
+    shape = shape + (2,)
+    # `d` is for "distance".
+    # For searches without a limit, the distance between the fixed end of the
+    # bracket `x0` and the moving end `x` will grow by `factor` each iteration.
+    # For searches with a limit, the distance between the `limit` and moving
+    # end of the bracket `x` will shrink by `factor` each iteration.
+    i = np.isinf(limit)
+    ni = ~i
+    d = np.zeros_like(x)
+    d[i] = x[i] - x0[i]
+    d[ni] = limit[ni] - x[ni]
+    status = np.full_like(x, eim._EINPROGRESS, dtype=int)  # in progress
+    status[invalid_bracket] = eim._EINPUTERR
+    nit, nfev = 0, 1  # one function evaluation per side performed above
+    work = _RichResult(x=x, x0=x0, f=f, limit=limit, factor=factor,
+                       active=active, d=d, x_last=x_last, f_last=f_last,
+                       nit=nit, nfev=nfev, status=status, args=args,
+                       xl=None, xr=None, fl=None, fr=None, n=n)
+    res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xr', 'xr'),
+                      ('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'),
+                      ('fr', 'fr'), ('x', 'x'), ('f', 'f'),
+                      ('x_last', 'x_last'), ('f_last', 'f_last')]
+    def pre_func_eval(work):
+        # Initialize moving end of bracket
+        x = np.zeros_like(work.x)
+        # Unlimited brackets grow by `factor` by increasing distance from fixed
+        # end to moving end.
+        i = np.isinf(work.limit)  # indices of unlimited brackets
+        work.d[i] *= work.factor[i]
+        x[i] = work.x0[i] + work.d[i]
+        # Limited brackets grow by decreasing the distance from the limit to
+        # the moving end.
+        ni = ~i  # indices of limited brackets
+        work.d[ni] /= work.factor[ni]
+        x[ni] = work.limit[ni] - work.d[ni]
+        return x
+    def post_func_eval(x, f, work):
+        # Keep track of the previous location of the moving end so that we can
+        # return a narrower bracket. (The alternative is to remember the
+        # original fixed end, but then the bracket would be wider than needed.)
+        work.x_last = work.x
+        work.f_last = work.f
+        work.x = x
+        work.f = f
+    def check_termination(work):
+        # Condition 0: initial bracket is invalid
+        stop = (work.status == eim._EINPUTERR)
+        # Condition 1: a valid bracket (or the root itself) has been found
+        sf = np.sign(work.f)
+        sf_last = np.sign(work.f_last)
+        i = ((sf_last == -sf) | (sf_last == 0) | (sf == 0)) & ~stop
+        work.status[i] = eim._ECONVERGED
+        stop[i] = True
+        # Condition 2: the other side's search found a valid bracket.
+        # (If we just found a bracket with the rightward search, we can stop
+        #  the leftward search, and vice-versa.)
+        # To do this, we need to set the status of the other side's search;
+        # this is tricky because `work.status` contains only the *active*
+        # elements, so we don't immediately know the index of the element we
+        # need to set - or even if it's still there. (That search may have
+        # terminated already, e.g. by reaching its `limit`.)
+        # To facilitate this, `work.active` contains a unit integer index of
+        # each search. Index `k` (`k < n)` and `k + n` correspond with a
+        # leftward and rightward search, respectively. Elements are removed
+        # from `work.active` just as they are removed from `work.status`, so
+        # we use `work.active` to help find the right location in
+        # `work.status`.
+        # Get the integer indices of the elements that can also stop
+        also_stop = (work.active[i] + work.n) % (2*work.n)
+        # Check whether they are still active.
+        # To start, we need to find out where in `work.active` they would
+        # appear if they are indeed there.
+        j = np.searchsorted(work.active, also_stop)
+        # If the location exceeds the length of the `work.active`, they are
+        # not there.
+        j = j[j < len(work.active)]
+        # Check whether they are still there.
+        j = j[also_stop == work.active[j]]
+        # Now convert these to boolean indices to use with `work.status`.
+        i = np.zeros_like(stop)
+        i[j] = True  # boolean indices of elements that can also stop
+        i = i & ~stop
+        work.status[i] = _ESTOPONESIDE
+        stop[i] = True
+        # Condition 3: moving end of bracket reaches limit
+        i = (work.x == work.limit) & ~stop
+        work.status[i] = _ELIMITS
+        stop[i] = True
+        # Condition 4: non-finite value encountered
+        i = ~(np.isfinite(work.x) & np.isfinite(work.f)) & ~stop
+        work.status[i] = eim._EVALUEERR
+        stop[i] = True
+        return stop
+    def post_termination_check(work):
+        pass
+    def customize_result(res, shape):
+        n = len(res['x']) // 2
+        # To avoid ambiguity, below we refer to `xl0`, the initial left endpoint
+        # as `a` and `xr0`, the initial right endpoint, as `b`.
+        # Because we treat the two one-sided searches as though they were
+        # independent, what we keep track of in `work` and what we want to
+        # return in `res` look quite different. Combine the results from the
+        # two one-sided searches before reporting the results to the user.
+        # - "a" refers to the leftward search (the moving end started at `a`)
+        # - "b" refers to the rightward search (the moving end started at `b`)
+        # - "l" refers to the left end of the bracket (closer to -oo)
+        # - "r" refers to the right end of the bracket (closer to +oo)
+        xal = res['x'][:n]
+        xar = res['x_last'][:n]
+        xbl = res['x_last'][n:]
+        xbr = res['x'][n:]
+        fal = res['f'][:n]
+        far = res['f_last'][:n]
+        fbl = res['f_last'][n:]
+        fbr = res['f'][n:]
+        # Initialize the brackets and corresponding function values to return
+        # to the user. Brackets may not be valid (e.g. there is no root,
+        # there weren't enough iterations, NaN encountered), but we still need
+        # to return something. One option would be all NaNs, but what I've
+        # chosen here is the left- and right-most points at which the function
+        # has been evaluated. This gives the user some information about what
+        # interval of the real line has been searched and shows that there is
+        # no sign change between the two ends.
+        xl = xal.copy()
+        fl = fal.copy()
+        xr = xbr.copy()
+        fr = fbr.copy()
+        # `status` indicates whether the bracket is valid or not. If so,
+        # we want to adjust the bracket we return to be the narrowest possible
+        # given the points at which we evaluated the function.
+        # For example if bracket "a" is valid and smaller than bracket "b" OR
+        # if bracket "a" is valid and bracket "b" is not valid, we want to
+        # return bracket "a" (and vice versa).
+        sa = res['status'][:n]
+        sb = res['status'][n:]
+        da = xar - xal
+        db = xbr - xbl
+        i1 = ((da <= db) & (sa == 0)) | ((sa == 0) & (sb != 0))
+        i2 = ((db <= da) & (sb == 0)) | ((sb == 0) & (sa != 0))
+        xr[i1] = xar[i1]
+        fr[i1] = far[i1]
+        xl[i2] = xbl[i2]
+        fl[i2] = fbl[i2]
+        # Finish assembling the result object
+        res['xl'] = xl
+        res['xr'] = xr
+        res['fl'] = fl
+        res['fr'] = fr
+        res['nit'] = np.maximum(res['nit'][:n], res['nit'][n:])
+        res['nfev'] = res['nfev'][:n] + res['nfev'][n:]
+        # If the status on one side is zero, the status is zero. In any case,
+        # report the status from one side only.
+        res['status'] = np.choose(sa == 0, (sb, sa))
+        res['success'] = (res['status'] == 0)
+        del res['x']
+        del res['f']
+        del res['x_last']
+        del res['f_last']
+        return shape[:-1]
+    return eim._loop(work, callback, shape, maxiter, func, args, dtype,
+                     pre_func_eval, post_func_eval, check_termination,
+                     post_termination_check, customize_result, res_work_pairs,
+                     xp)
+def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter):
+    if not callable(func):
+        raise ValueError('`func` must be callable.')
+    if not np.iterable(args):
+        args = (args,)
+    xm0 = np.asarray(xm0)[()]
+    if not np.issubdtype(xm0.dtype, np.number) or np.iscomplex(xm0).any():
+        raise ValueError('`xm0` must be numeric and real.')
+    xmin = -np.inf if xmin is None else xmin
+    xmax = np.inf if xmax is None else xmax
+    # If xl0 (xr0) is not supplied, fill with a dummy value for the sake
+    # of broadcasting. We need to wait until xmin (xmax) has been validated
+    # to compute the default values.
+    xl0_not_supplied = False
+    if xl0 is None:
+        xl0 = np.nan
+        xl0_not_supplied = True
+    xr0_not_supplied = False
+    if xr0 is None:
+        xr0 = np.nan
+        xr0_not_supplied = True
+    factor = 2.0 if factor is None else factor
+    xl0, xm0, xr0, xmin, xmax, factor = np.broadcast_arrays(
+        xl0, xm0, xr0, xmin, xmax, factor
+    )
+    if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
+        raise ValueError('`xl0` must be numeric and real.')
+    if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
+        raise ValueError('`xr0` must be numeric and real.')
+    if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
+        raise ValueError('`xmin` must be numeric and real.')
+    if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
+        raise ValueError('`xmax` must be numeric and real.')
+    if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
+        raise ValueError('`factor` must be numeric and real.')
+    if not np.all(factor > 1):
+        raise ValueError('All elements of `factor` must be greater than 1.')
+    # Calculate default values of xl0 and/or xr0 if they have not been supplied
+    # by the user. We need to be careful to ensure xl0 and xr0 are not outside
+    # of (xmin, xmax).
+    if xl0_not_supplied:
+        xl0 = xm0 - np.minimum((xm0 - xmin)/16, 0.5)
+    if xr0_not_supplied:
+        xr0 = xm0 + np.minimum((xmax - xm0)/16, 0.5)
+    maxiter = np.asarray(maxiter)
+    message = '`maxiter` must be a non-negative integer.'
+    if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
+            or np.iscomplex(maxiter)):
+        raise ValueError(message)
+    maxiter_int = int(maxiter[()])
+    if not maxiter == maxiter_int or maxiter < 0:
+        raise ValueError(message)
+    return func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter
+def _bracket_minimum(func, xm0, *, xl0=None, xr0=None, xmin=None, xmax=None,
+                     factor=None, args=(), maxiter=1000):
+    """Bracket the minimum of a unimodal scalar function of one variable
+    This function works elementwise when `xm0`, `xl0`, `xr0`, `xmin`, `xmax`,
+    and the elements of `args` are broadcastable arrays.
+    Parameters
+    ----------
+    func : callable
+        The function for which the minimum is to be bracketed.
+        The signature must be::
+            func(x: ndarray, *args) -> ndarray
+        where each element of ``x`` is a finite real and ``args`` is a tuple,
+        which may contain an arbitrary number of arrays that are broadcastable
+        with ``x``. `func` must be an elementwise function: each element
+        ``func(x)[i]`` must equal ``func(x[i])`` for all indices `i`.
+    xm0: float array_like
+        Starting guess for middle point of bracket.
+    xl0, xr0: float array_like, optional
+        Starting guesses for left and right endpoints of the bracket. Must be
+        broadcastable with one another and with `xm0`.
+    xmin, xmax : float array_like, optional
+        Minimum and maximum allowable endpoints of the bracket, inclusive. Must
+        be broadcastable with `xl0`, `xm0`, and `xr0`.
+    factor : float array_like, optional
+        Controls expansion of bracket endpoint in downhill direction. Works
+        differently in the cases where a limit is set in the downhill direction
+        with `xmax` or `xmin`. See Notes.
+    args : tuple, optional
+        Additional positional arguments to be passed to `func`.  Must be arrays
+        broadcastable with `xl0`, `xm0`, `xr0`, `xmin`, and `xmax`. If the
+        callable to be bracketed requires arguments that are not broadcastable
+        with these arrays, wrap that callable with `func` such that `func`
+        accepts only ``x`` and broadcastable arrays.
+    maxiter : int, optional
+        The maximum number of iterations of the algorithm to perform. The number
+        of function evaluations is three greater than the number of iterations.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes. The descriptions are written as though the values will be
+        scalars; however, if `func` returns an array, the outputs will be
+        arrays of the same shape.
+        xl, xm, xr : float
+            The left, middle, and right points of the bracket, if the algorithm
+            terminated successfully.
+        fl, fm, fr : float
+            The function value at the left, middle, and right points of the bracket.
+        nfev : int
+            The number of function evaluations required to find the bracket.
+        nit : int
+            The number of iterations of the algorithm that were performed.
+        status : int
+            An integer representing the exit status of the algorithm.
+            - ``0`` : The algorithm produced a valid bracket.
+            - ``-1`` : The bracket expanded to the allowable limits. Assuming
+                       unimodality, this implies the endpoint at the limit is a
+                       minimizer.
+            - ``-2`` : The maximum number of iterations was reached.
+            - ``-3`` : A non-finite value was encountered.
+            - ``-4`` : ``None`` shall pass.
+            - ``-5`` : The initial bracket does not satisfy
+                       `xmin <= xl0 < xm0 < xr0 <= xmax`.
+        success : bool
+            ``True`` when the algorithm terminated successfully (status ``0``).
+    Notes
+    -----
+    Similar to `scipy.optimize.bracket`, this function seeks to find real
+    points ``xl < xm < xr`` such that ``f(xl) >= f(xm)`` and ``f(xr) >= f(xm)``,
+    where at least one of the inequalities is strict. Unlike `scipy.optimize.bracket`,
+    this function can operate in a vectorized manner on array input, so long as
+    the input arrays are broadcastable with each other. Also unlike
+    `scipy.optimize.bracket`, users may specify minimum and maximum endpoints
+    for the desired bracket.
+    Given an initial trio of points ``xl = xl0``, ``xm = xm0``, ``xr = xr0``,
+    the algorithm checks if these points already give a valid bracket. If not,
+    a new endpoint, ``w`` is chosen in the "downhill" direction, ``xm`` becomes the new
+    opposite endpoint, and either `xl` or `xr` becomes the new middle point,
+    depending on which direction is downhill. The algorithm repeats from here.
+    The new endpoint `w` is chosen differently depending on whether or not a
+    boundary `xmin` or `xmax` has been set in the downhill direction. Without
+    loss of generality, suppose the downhill direction is to the right, so that
+    ``f(xl) > f(xm) > f(xr)``. If there is no boundary to the right, then `w`
+    is chosen to be ``xr + factor * (xr - xm)`` where `factor` is controlled by
+    the user (defaults to 2.0) so that step sizes increase in geometric proportion.
+    If there is a boundary, `xmax` in this case, then `w` is chosen to be
+    ``xmax - (xmax - xr)/factor``, with steps slowing to a stop at
+    `xmax`. This cautious approach ensures that a minimum near but distinct from
+    the boundary isn't missed while also detecting whether or not the `xmax` is
+    a minimizer when `xmax` is reached after a finite number of steps.
+    """  # noqa: E501
+    callback = None  # works; I just don't want to test it
+    temp = _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter)
+    func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
+    xs = (xl0, xm0, xr0)
+    temp = eim._initialize(func, xs, args)
+    func, xs, fs, args, shape, dtype, xp = temp
+    xl0, xm0, xr0 = xs
+    fl0, fm0, fr0 = fs
+    xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
+    xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
+    invalid_bracket = ~((xmin <= xl0) & (xl0 < xm0) & (xm0 < xr0) & (xr0 <= xmax))
+    # We will modify factor later on so make a copy. np.broadcast_to returns
+    # a read-only view.
+    factor = np.broadcast_to(factor, shape).astype(dtype, copy=True).ravel()
+    # To simplify the logic, swap xl and xr if f(xl) < f(xr). We should always be
+    # marching downhill in the direction from xl to xr.
+    comp = fl0 < fr0
+    xl0[comp], xr0[comp] = xr0[comp], xl0[comp]
+    fl0[comp], fr0[comp] = fr0[comp], fl0[comp]
+    # We only need the boundary in the direction we're traveling.
+    limit = np.where(comp, xmin, xmax)
+    unlimited = np.isinf(limit)
+    limited = ~unlimited
+    step = np.empty_like(xl0)
+    step[unlimited] = (xr0[unlimited] - xm0[unlimited])
+    step[limited] = (limit[limited] - xr0[limited])
+    # Step size is divided by factor for case where there is a limit.
+    factor[limited] = 1 / factor[limited]
+    status = np.full_like(xl0, eim._EINPROGRESS, dtype=int)
+    status[invalid_bracket] = eim._EINPUTERR
+    nit, nfev = 0, 3
+    work = _RichResult(xl=xl0, xm=xm0, xr=xr0, xr0=xr0, fl=fl0, fm=fm0, fr=fr0,
+                       step=step, limit=limit, limited=limited, factor=factor, nit=nit,
+                       nfev=nfev, status=status, args=args)
+    res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xm', 'xm'), ('xr', 'xr'),
+                      ('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'), ('fm', 'fm'),
+                      ('fr', 'fr')]
+    def pre_func_eval(work):
+        work.step *= work.factor
+        x = np.empty_like(work.xr)
+        x[~work.limited] = work.xr0[~work.limited] + work.step[~work.limited]
+        x[work.limited] = work.limit[work.limited] - work.step[work.limited]
+        # Since the new bracket endpoint is calculated from an offset with the
+        # limit, it may be the case that the new endpoint equals the old endpoint,
+        # when the old endpoint is sufficiently close to the limit. We use the
+        # limit itself as the new endpoint in these cases.
+        x[work.limited] = np.where(
+            x[work.limited] == work.xr[work.limited],
+            work.limit[work.limited],
+            x[work.limited],
+        )
+        return x
+    def post_func_eval(x, f, work):
+        work.xl, work.xm, work.xr = work.xm, work.xr, x
+        work.fl, work.fm, work.fr = work.fm, work.fr, f
+    def check_termination(work):
+        # Condition 0: Initial bracket is invalid.
+        stop = (work.status == eim._EINPUTERR)
+        # Condition 1: A valid bracket has been found.
+        i = (
+            (work.fl >= work.fm) & (work.fr > work.fm)
+            | (work.fl > work.fm) & (work.fr >= work.fm)
+        ) & ~stop
+        work.status[i] = eim._ECONVERGED
+        stop[i] = True
+        # Condition 2: Moving end of bracket reaches limit.
+        i = (work.xr == work.limit) & ~stop
+        work.status[i] = _ELIMITS
+        stop[i] = True
+        # Condition 3: non-finite value encountered
+        i = ~(np.isfinite(work.xr) & np.isfinite(work.fr)) & ~stop
+        work.status[i] = eim._EVALUEERR
+        stop[i] = True
+        return stop
+    def post_termination_check(work):
+        pass
+    def customize_result(res, shape):
+        # Reorder entries of xl and xr if they were swapped due to f(xl0) < f(xr0).
+        comp = res['xl'] > res['xr']
+        res['xl'][comp], res['xr'][comp] = res['xr'][comp], res['xl'][comp]
+        res['fl'][comp], res['fr'][comp] = res['fr'][comp], res['fl'][comp]
+        return shape
+    return eim._loop(work, callback, shape,
+                     maxiter, func, args, dtype,
+                     pre_func_eval, post_func_eval,
+                     check_termination, post_termination_check,
+                     customize_result, res_work_pairs, xp)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py ADDED Viewed

	@@ -0,0 +1,549 @@

+import math
+import numpy as np
+import scipy._lib._elementwise_iterative_method as eim
+from scipy._lib._util import _RichResult
+from scipy._lib._array_api import xp_clip, xp_minimum, xp_sign
+# TODO:
+# - (maybe?) don't use fancy indexing assignment
+# - figure out how to replace the new `try`/`except`s
+def _chandrupatla(func, a, b, *, args=(), xatol=None, xrtol=None,
+                  fatol=None, frtol=0, maxiter=None, callback=None):
+    """Find the root of an elementwise function using Chandrupatla's algorithm.
+    For each element of the output of `func`, `chandrupatla` seeks the scalar
+    root that makes the element 0. This function allows for `a`, `b`, and the
+    output of `func` to be of any broadcastable shapes.
+    Parameters
+    ----------
+    func : callable
+        The function whose root is desired. The signature must be::
+            func(x: ndarray, *args) -> ndarray
+         where each element of ``x`` is a finite real and ``args`` is a tuple,
+         which may contain an arbitrary number of components of any type(s).
+         ``func`` must be an elementwise function: each element ``func(x)[i]``
+         must equal ``func(x[i])`` for all indices ``i``. `_chandrupatla`
+         seeks an array ``x`` such that ``func(x)`` is an array of zeros.
+    a, b : array_like
+        The lower and upper bounds of the root of the function. Must be
+        broadcastable with one another.
+    args : tuple, optional
+        Additional positional arguments to be passed to `func`.
+    xatol, xrtol, fatol, frtol : float, optional
+        Absolute and relative tolerances on the root and function value.
+        See Notes for details.
+    maxiter : int, optional
+        The maximum number of iterations of the algorithm to perform.
+        The default is the maximum possible number of bisections within
+        the (normal) floating point numbers of the relevant dtype.
+    callback : callable, optional
+        An optional user-supplied function to be called before the first
+        iteration and after each iteration.
+        Called as ``callback(res)``, where ``res`` is a ``_RichResult``
+        similar to that returned by `_chandrupatla` (but containing the current
+        iterate's values of all variables). If `callback` raises a
+        ``StopIteration``, the algorithm will terminate immediately and
+        `_chandrupatla` will return a result.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes. The descriptions are written as though the values will be
+        scalars; however, if `func` returns an array, the outputs will be
+        arrays of the same shape.
+        x : float
+            The root of the function, if the algorithm terminated successfully.
+        nfev : int
+            The number of times the function was called to find the root.
+        nit : int
+            The number of iterations of Chandrupatla's algorithm performed.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : The algorithm converged to the specified tolerances.
+            ``-1`` : The algorithm encountered an invalid bracket.
+            ``-2`` : The maximum number of iterations was reached.
+            ``-3`` : A non-finite value was encountered.
+            ``-4`` : Iteration was terminated by `callback`.
+            ``1`` : The algorithm is proceeding normally (in `callback` only).
+        success : bool
+            ``True`` when the algorithm terminated successfully (status ``0``).
+        fun : float
+            The value of `func` evaluated at `x`.
+        xl, xr : float
+            The lower and upper ends of the bracket.
+        fl, fr : float
+            The function value at the lower and upper ends of the bracket.
+    Notes
+    -----
+    Implemented based on Chandrupatla's original paper [1]_.
+    If ``xl`` and ``xr`` are the left and right ends of the bracket,
+    ``xmin = xl if abs(func(xl)) <= abs(func(xr)) else xr``,
+    and ``fmin0 = min(func(a), func(b))``, then the algorithm is considered to
+    have converged when ``abs(xr - xl) < xatol + abs(xmin) * xrtol`` or
+    ``fun(xmin) <= fatol + abs(fmin0) * frtol``. This is equivalent to the
+    termination condition described in [1]_ with ``xrtol = 4e-10``,
+    ``xatol = 1e-5``, and ``fatol = frtol = 0``. The default values are
+    ``xatol = 4*tiny``, ``xrtol = 4*eps``, ``frtol = 0``, and ``fatol = tiny``,
+    where ``eps`` and ``tiny`` are the precision and smallest normal number
+    of the result ``dtype`` of function inputs and outputs.
+    References
+    ----------
+    .. [1] Chandrupatla, Tirupathi R.
+        "A new hybrid quadratic/bisection algorithm for finding the zero of a
+        nonlinear function without using derivatives".
+        Advances in Engineering Software, 28(3), 145-149.
+        https://doi.org/10.1016/s0965-9978(96)00051-8
+    See Also
+    --------
+    brentq, brenth, ridder, bisect, newton
+    Examples
+    --------
+    >>> from scipy import optimize
+    >>> def f(x, c):
+    ...     return x**3 - 2*x - c
+    >>> c = 5
+    >>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
+    >>> res.x
+    2.0945514818937463
+    >>> c = [3, 4, 5]
+    >>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
+    >>> res.x
+    array([1.8932892 , 2.        , 2.09455148])
+    """
+    res = _chandrupatla_iv(func, args, xatol, xrtol,
+                           fatol, frtol, maxiter, callback)
+    func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
+    # Initialization
+    temp = eim._initialize(func, (a, b), args)
+    func, xs, fs, args, shape, dtype, xp = temp
+    x1, x2 = xs
+    f1, f2 = fs
+    status = xp.full_like(x1, eim._EINPROGRESS, dtype=xp.int32)  # in progress
+    nit, nfev = 0, 2  # two function evaluations performed above
+    finfo = xp.finfo(dtype)
+    xatol = 4*finfo.smallest_normal if xatol is None else xatol
+    xrtol = 4*finfo.eps if xrtol is None else xrtol
+    fatol = finfo.smallest_normal if fatol is None else fatol
+    frtol = frtol * xp_minimum(xp.abs(f1), xp.abs(f2))
+    maxiter = (math.log2(finfo.max) - math.log2(finfo.smallest_normal)
+               if maxiter is None else maxiter)
+    work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=None, f3=None, t=0.5,
+                       xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
+                       nit=nit, nfev=nfev, status=status)
+    res_work_pairs = [('status', 'status'), ('x', 'xmin'), ('fun', 'fmin'),
+                      ('nit', 'nit'), ('nfev', 'nfev'), ('xl', 'x1'),
+                      ('fl', 'f1'), ('xr', 'x2'), ('fr', 'f2')]
+    def pre_func_eval(work):
+        # [1] Figure 1 (first box)
+        x = work.x1 + work.t * (work.x2 - work.x1)
+        return x
+    def post_func_eval(x, f, work):
+        # [1] Figure 1 (first diamond and boxes)
+        # Note: y/n are reversed in figure; compare to BASIC in appendix
+        work.x3, work.f3 = (xp.asarray(work.x2, copy=True),
+                            xp.asarray(work.f2, copy=True))
+        j = xp.sign(f) == xp.sign(work.f1)
+        nj = ~j
+        work.x3[j], work.f3[j] = work.x1[j], work.f1[j]
+        work.x2[nj], work.f2[nj] = work.x1[nj], work.f1[nj]
+        work.x1, work.f1 = x, f
+    def check_termination(work):
+        # [1] Figure 1 (second diamond)
+        # Check for all terminal conditions and record statuses.
+        # See [1] Section 4 (first two sentences)
+        i = xp.abs(work.f1) < xp.abs(work.f2)
+        work.xmin = xp.where(i, work.x1, work.x2)
+        work.fmin = xp.where(i, work.f1, work.f2)
+        stop = xp.zeros_like(work.x1, dtype=xp.bool)  # termination condition met
+        # If function value tolerance is met, report successful convergence,
+        # regardless of other conditions. Note that `frtol` has been redefined
+        # as `frtol = frtol * minimum(f1, f2)`, where `f1` and `f2` are the
+        # function evaluated at the original ends of the bracket.
+        i = xp.abs(work.fmin) <= work.fatol + work.frtol
+        work.status[i] = eim._ECONVERGED
+        stop[i] = True
+        # If the bracket is no longer valid, report failure (unless a function
+        # tolerance is met, as detected above).
+        i = (xp_sign(work.f1) == xp_sign(work.f2)) & ~stop
+        NaN = xp.asarray(xp.nan, dtype=work.xmin.dtype)
+        work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._ESIGNERR
+        stop[i] = True
+        # If the abscissae are non-finite or either function value is NaN,
+        # report failure.
+        x_nonfinite = ~(xp.isfinite(work.x1) & xp.isfinite(work.x2))
+        f_nan = xp.isnan(work.f1) & xp.isnan(work.f2)
+        i = (x_nonfinite | f_nan) & ~stop
+        work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._EVALUEERR
+        stop[i] = True
+        # This is the convergence criterion used in bisect. Chandrupatla's
+        # criterion is equivalent to this except with a factor of 4 on `xrtol`.
+        work.dx = xp.abs(work.x2 - work.x1)
+        work.tol = xp.abs(work.xmin) * work.xrtol + work.xatol
+        i = work.dx < work.tol
+        work.status[i] = eim._ECONVERGED
+        stop[i] = True
+        return stop
+    def post_termination_check(work):
+        # [1] Figure 1 (third diamond and boxes / Equation 1)
+        xi1 = (work.x1 - work.x2) / (work.x3 - work.x2)
+        phi1 = (work.f1 - work.f2) / (work.f3 - work.f2)
+        alpha = (work.x3 - work.x1) / (work.x2 - work.x1)
+        j = ((1 - xp.sqrt(1 - xi1)) < phi1) & (phi1 < xp.sqrt(xi1))
+        f1j, f2j, f3j, alphaj = work.f1[j], work.f2[j], work.f3[j], alpha[j]
+        t = xp.full_like(alpha, 0.5)
+        t[j] = (f1j / (f1j - f2j) * f3j / (f3j - f2j)
+                - alphaj * f1j / (f3j - f1j) * f2j / (f2j - f3j))
+        # [1] Figure 1 (last box; see also BASIC in appendix with comment
+        # "Adjust T Away from the Interval Boundary")
+        tl = 0.5 * work.tol / work.dx
+        work.t = xp_clip(t, tl, 1 - tl)
+    def customize_result(res, shape):
+        xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
+        i = res['xl'] < res['xr']
+        res['xl'] = xp.where(i, xl, xr)
+        res['xr'] = xp.where(i, xr, xl)
+        res['fl'] = xp.where(i, fl, fr)
+        res['fr'] = xp.where(i, fr, fl)
+        return shape
+    return eim._loop(work, callback, shape, maxiter, func, args, dtype,
+                     pre_func_eval, post_func_eval, check_termination,
+                     post_termination_check, customize_result, res_work_pairs,
+                     xp=xp)
+def _chandrupatla_iv(func, args, xatol, xrtol,
+                     fatol, frtol, maxiter, callback):
+    # Input validation for `_chandrupatla`
+    if not callable(func):
+        raise ValueError('`func` must be callable.')
+    if not np.iterable(args):
+        args = (args,)
+    # tolerances are floats, not arrays; OK to use NumPy
+    tols = np.asarray([xatol if xatol is not None else 1,
+                       xrtol if xrtol is not None else 1,
+                       fatol if fatol is not None else 1,
+                       frtol if frtol is not None else 1])
+    if (not np.issubdtype(tols.dtype, np.number) or np.any(tols < 0)
+            or np.any(np.isnan(tols)) or tols.shape != (4,)):
+        raise ValueError('Tolerances must be non-negative scalars.')
+    if maxiter is not None:
+        maxiter_int = int(maxiter)
+        if maxiter != maxiter_int or maxiter < 0:
+            raise ValueError('`maxiter` must be a non-negative integer.')
+    if callback is not None and not callable(callback):
+        raise ValueError('`callback` must be callable.')
+    return func, args, xatol, xrtol, fatol, frtol, maxiter, callback
+def _chandrupatla_minimize(func, x1, x2, x3, *, args=(), xatol=None,
+                           xrtol=None, fatol=None, frtol=None, maxiter=100,
+                           callback=None):
+    """Find the minimizer of an elementwise function.
+    For each element of the output of `func`, `_chandrupatla_minimize` seeks
+    the scalar minimizer that minimizes the element. This function allows for
+    `x1`, `x2`, `x3`, and the elements of `args` to be arrays of any
+    broadcastable shapes.
+    Parameters
+    ----------
+    func : callable
+        The function whose minimizer is desired. The signature must be::
+            func(x: ndarray, *args) -> ndarray
+         where each element of ``x`` is a finite real and ``args`` is a tuple,
+         which may contain an arbitrary number of arrays that are broadcastable
+         with `x`. ``func`` must be an elementwise function: each element
+         ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
+         `_chandrupatla` seeks an array ``x`` such that ``func(x)`` is an array
+         of minima.
+    x1, x2, x3 : array_like
+        The abscissae of a standard scalar minimization bracket. A bracket is
+        valid if ``x1 < x2 < x3`` and ``func(x1) > func(x2) <= func(x3)``.
+        Must be broadcastable with one another and `args`.
+    args : tuple, optional
+        Additional positional arguments to be passed to `func`.  Must be arrays
+        broadcastable with `x1`, `x2`, and `x3`. If the callable to be
+        differentiated requires arguments that are not broadcastable with `x`,
+        wrap that callable with `func` such that `func` accepts only `x` and
+        broadcastable arrays.
+    xatol, xrtol, fatol, frtol : float, optional
+        Absolute and relative tolerances on the minimizer and function value.
+        See Notes for details.
+    maxiter : int, optional
+        The maximum number of iterations of the algorithm to perform.
+    callback : callable, optional
+        An optional user-supplied function to be called before the first
+        iteration and after each iteration.
+        Called as ``callback(res)``, where ``res`` is a ``_RichResult``
+        similar to that returned by `_chandrupatla_minimize` (but containing
+        the current iterate's values of all variables). If `callback` raises a
+        ``StopIteration``, the algorithm will terminate immediately and
+        `_chandrupatla_minimize` will return a result.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes. (The descriptions are written as though the values will be
+        scalars; however, if `func` returns an array, the outputs will be
+        arrays of the same shape.)
+        success : bool
+            ``True`` when the algorithm terminated successfully (status ``0``).
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : The algorithm converged to the specified tolerances.
+            ``-1`` : The algorithm encountered an invalid bracket.
+            ``-2`` : The maximum number of iterations was reached.
+            ``-3`` : A non-finite value was encountered.
+            ``-4`` : Iteration was terminated by `callback`.
+            ``1`` : The algorithm is proceeding normally (in `callback` only).
+        x : float
+            The minimizer of the function, if the algorithm terminated
+            successfully.
+        fun : float
+            The value of `func` evaluated at `x`.
+        nfev : int
+            The number of points at which `func` was evaluated.
+        nit : int
+            The number of iterations of the algorithm that were performed.
+        xl, xm, xr : float
+            The final three-point bracket.
+        fl, fm, fr : float
+            The function value at the bracket points.
+    Notes
+    -----
+    Implemented based on Chandrupatla's original paper [1]_.
+    If ``x1 < x2 < x3`` are the points of the bracket and ``f1 > f2 <= f3``
+    are the values of ``func`` at those points, then the algorithm is
+    considered to have converged when ``x3 - x1 <= abs(x2)*xrtol + xatol``
+    or ``(f1 - 2*f2 + f3)/2 <= abs(f2)*frtol + fatol``. Note that first of
+    these differs from the termination conditions described in [1]_. The
+    default values of `xrtol` is the square root of the precision of the
+    appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
+    number of the appropriate dtype.
+    References
+    ----------
+    .. [1] Chandrupatla, Tirupathi R. (1998).
+        "An efficient quadratic fit-sectioning algorithm for minimization
+        without derivatives".
+        Computer Methods in Applied Mechanics and Engineering, 152 (1-2),
+        211-217. https://doi.org/10.1016/S0045-7825(97)00190-4
+    See Also
+    --------
+    golden, brent, bounded
+    Examples
+    --------
+    >>> from scipy.optimize._chandrupatla import _chandrupatla_minimize
+    >>> def f(x, args=1):
+    ...     return (x - args)**2
+    >>> res = _chandrupatla_minimize(f, -5, 0, 5)
+    >>> res.x
+    1.0
+    >>> c = [1, 1.5, 2]
+    >>> res = _chandrupatla_minimize(f, -5, 0, 5, args=(c,))
+    >>> res.x
+    array([1. , 1.5, 2. ])
+    """
+    res = _chandrupatla_iv(func, args, xatol, xrtol,
+                           fatol, frtol, maxiter, callback)
+    func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
+    # Initialization
+    xs = (x1, x2, x3)
+    temp = eim._initialize(func, xs, args)
+    func, xs, fs, args, shape, dtype, xp = temp  # line split for PEP8
+    x1, x2, x3 = xs
+    f1, f2, f3 = fs
+    phi = dtype.type(0.5 + 0.5*5**0.5)  # golden ratio
+    status = np.full_like(x1, eim._EINPROGRESS, dtype=int)  # in progress
+    nit, nfev = 0, 3  # three function evaluations performed above
+    fatol = np.finfo(dtype).tiny if fatol is None else fatol
+    frtol = np.finfo(dtype).tiny if frtol is None else frtol
+    xatol = np.finfo(dtype).tiny if xatol is None else xatol
+    xrtol = np.sqrt(np.finfo(dtype).eps) if xrtol is None else xrtol
+    # Ensure that x1 < x2 < x3 initially.
+    xs, fs = np.vstack((x1, x2, x3)), np.vstack((f1, f2, f3))
+    i = np.argsort(xs, axis=0)
+    x1, x2, x3 = np.take_along_axis(xs, i, axis=0)
+    f1, f2, f3 = np.take_along_axis(fs, i, axis=0)
+    q0 = x3.copy()  # "At the start, q0 is set at x3..." ([1] after (7))
+    work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=x3, f3=f3, phi=phi,
+                       xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
+                       nit=nit, nfev=nfev, status=status, q0=q0, args=args)
+    res_work_pairs = [('status', 'status'),
+                      ('x', 'x2'), ('fun', 'f2'),
+                      ('nit', 'nit'), ('nfev', 'nfev'),
+                      ('xl', 'x1'), ('xm', 'x2'), ('xr', 'x3'),
+                      ('fl', 'f1'), ('fm', 'f2'), ('fr', 'f3')]
+    def pre_func_eval(work):
+        # `_check_termination` is called first -> `x3 - x2 > x2 - x1`
+        # But let's calculate a few terms that we'll reuse
+        x21 = work.x2 - work.x1
+        x32 = work.x3 - work.x2
+        # [1] Section 3. "The quadratic minimum point Q1 is calculated using
+        # the relations developed in the previous section." [1] Section 2 (5/6)
+        A = x21 * (work.f3 - work.f2)
+        B = x32 * (work.f1 - work.f2)
+        C = A / (A + B)
+        # q1 = C * (work.x1 + work.x2) / 2 + (1 - C) * (work.x2 + work.x3) / 2
+        q1 = 0.5 * (C*(work.x1 - work.x3) + work.x2 + work.x3)  # much faster
+        # this is an array, so multiplying by 0.5 does not change dtype
+        # "If Q1 and Q0 are sufficiently close... Q1 is accepted if it is
+        # sufficiently away from the inside point x2"
+        i = abs(q1 - work.q0) < 0.5 * abs(x21)  # [1] (7)
+        xi = q1[i]
+        # Later, after (9), "If the point Q1 is in a +/- xtol neighborhood of
+        # x2, the new point is chosen in the larger interval at a distance
+        # tol away from x2."
+        # See also QBASIC code after "Accept Ql adjust if close to X2".
+        j = abs(q1[i] - work.x2[i]) <= work.xtol[i]
+        xi[j] = work.x2[i][j] + np.sign(x32[i][j]) * work.xtol[i][j]
+        # "If condition (7) is not satisfied, golden sectioning of the larger
+        # interval is carried out to introduce the new point."
+        # (For simplicity, we go ahead and calculate it for all points, but we
+        # change the elements for which the condition was satisfied.)
+        x = work.x2 + (2 - work.phi) * x32
+        x[i] = xi
+        # "We define Q0 as the value of Q1 at the previous iteration."
+        work.q0 = q1
+        return x
+    def post_func_eval(x, f, work):
+        # Standard logic for updating a three-point bracket based on a new
+        # point. In QBASIC code, see "IF SGN(X-X2) = SGN(X3-X2) THEN...".
+        # There is an awful lot of data copying going on here; this would
+        # probably benefit from code optimization or implementation in Pythran.
+        i = np.sign(x - work.x2) == np.sign(work.x3 - work.x2)
+        xi, x1i, x2i, x3i = x[i], work.x1[i], work.x2[i], work.x3[i],
+        fi, f1i, f2i, f3i = f[i], work.f1[i], work.f2[i], work.f3[i]
+        j = fi > f2i
+        x3i[j], f3i[j] = xi[j], fi[j]
+        j = ~j
+        x1i[j], f1i[j], x2i[j], f2i[j] = x2i[j], f2i[j], xi[j], fi[j]
+        ni = ~i
+        xni, x1ni, x2ni, x3ni = x[ni], work.x1[ni], work.x2[ni], work.x3[ni],
+        fni, f1ni, f2ni, f3ni = f[ni], work.f1[ni], work.f2[ni], work.f3[ni]
+        j = fni > f2ni
+        x1ni[j], f1ni[j] = xni[j], fni[j]
+        j = ~j
+        x3ni[j], f3ni[j], x2ni[j], f2ni[j] = x2ni[j], f2ni[j], xni[j], fni[j]
+        work.x1[i], work.x2[i], work.x3[i] = x1i, x2i, x3i
+        work.f1[i], work.f2[i], work.f3[i] = f1i, f2i, f3i
+        work.x1[ni], work.x2[ni], work.x3[ni] = x1ni, x2ni, x3ni,
+        work.f1[ni], work.f2[ni], work.f3[ni] = f1ni, f2ni, f3ni
+    def check_termination(work):
+        # Check for all terminal conditions and record statuses.
+        stop = np.zeros_like(work.x1, dtype=bool)  # termination condition met
+        # Bracket is invalid; stop and don't return minimizer/minimum
+        i = ((work.f2 > work.f1) | (work.f2 > work.f3))
+        work.x2[i], work.f2[i] = np.nan, np.nan
+        stop[i], work.status[i] = True, eim._ESIGNERR
+        # Non-finite values; stop and don't return minimizer/minimum
+        finite = np.isfinite(work.x1+work.x2+work.x3+work.f1+work.f2+work.f3)
+        i = ~(finite | stop)
+        work.x2[i], work.f2[i] = np.nan, np.nan
+        stop[i], work.status[i] = True, eim._EVALUEERR
+        # [1] Section 3 "Points 1 and 3 are interchanged if necessary to make
+        # the (x2, x3) the larger interval."
+        # Note: I had used np.choose; this is much faster. This would be a good
+        # place to save e.g. `work.x3 - work.x2` for reuse, but I tried and
+        # didn't notice a speed boost, so let's keep it simple.
+        i = abs(work.x3 - work.x2) < abs(work.x2 - work.x1)
+        temp = work.x1[i]
+        work.x1[i] = work.x3[i]
+        work.x3[i] = temp
+        temp = work.f1[i]
+        work.f1[i] = work.f3[i]
+        work.f3[i] = temp
+        # [1] Section 3 (bottom of page 212)
+        # "We set a tolerance value xtol..."
+        work.xtol = abs(work.x2) * work.xrtol + work.xatol  # [1] (8)
+        # "The convergence based on interval is achieved when..."
+        # Note: Equality allowed in case of `xtol=0`
+        i = abs(work.x3 - work.x2) <= 2 * work.xtol  # [1] (9)
+        # "We define ftol using..."
+        ftol = abs(work.f2) * work.frtol + work.fatol  # [1] (10)
+        # "The convergence based on function values is achieved when..."
+        # Note 1: modify in place to incorporate tolerance on function value.
+        # Note 2: factor of 2 is not in the text; see QBASIC start of DO loop
+        i |= (work.f1 - 2 * work.f2 + work.f3) <= 2*ftol  # [1] (11)
+        i &= ~stop
+        stop[i], work.status[i] = True, eim._ECONVERGED
+        return stop
+    def post_termination_check(work):
+        pass
+    def customize_result(res, shape):
+        xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
+        i = res['xl'] < res['xr']
+        res['xl'] = np.choose(i, (xr, xl))
+        res['xr'] = np.choose(i, (xl, xr))
+        res['fl'] = np.choose(i, (fr, fl))
+        res['fr'] = np.choose(i, (fl, fr))
+        return shape
+    return eim._loop(work, callback, shape, maxiter, func, args, dtype,
+                     pre_func_eval, post_func_eval, check_termination,
+                     post_termination_check, customize_result, res_work_pairs,
+                     xp=xp)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py ADDED Viewed

	@@ -0,0 +1,316 @@

+"""
+Interface to Constrained Optimization By Linear Approximation
+Functions
+---------
+.. autosummary::
+   :toctree: generated/
+    fmin_cobyla
+"""
+import functools
+from threading import RLock
+import numpy as np
+from scipy.optimize import _cobyla as cobyla
+from ._optimize import (OptimizeResult, _check_unknown_options,
+    _prepare_scalar_function)
+try:
+    from itertools import izip
+except ImportError:
+    izip = zip
+__all__ = ['fmin_cobyla']
+# Workaround as _cobyla.minimize is not threadsafe
+# due to an unknown f2py bug and can segfault,
+# see gh-9658.
+_module_lock = RLock()
+def synchronized(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        with _module_lock:
+            return func(*args, **kwargs)
+    return wrapper
+@synchronized
+def fmin_cobyla(func, x0, cons, args=(), consargs=None, rhobeg=1.0,
+                rhoend=1e-4, maxfun=1000, disp=None, catol=2e-4,
+                *, callback=None):
+    """
+    Minimize a function using the Constrained Optimization By Linear
+    Approximation (COBYLA) method. This method wraps a FORTRAN
+    implementation of the algorithm.
+    Parameters
+    ----------
+    func : callable
+        Function to minimize. In the form func(x, \\*args).
+    x0 : ndarray
+        Initial guess.
+    cons : sequence
+        Constraint functions; must all be ``>=0`` (a single function
+        if only 1 constraint). Each function takes the parameters `x`
+        as its first argument, and it can return either a single number or
+        an array or list of numbers.
+    args : tuple, optional
+        Extra arguments to pass to function.
+    consargs : tuple, optional
+        Extra arguments to pass to constraint functions (default of None means
+        use same extra arguments as those passed to func).
+        Use ``()`` for no extra arguments.
+    rhobeg : float, optional
+        Reasonable initial changes to the variables.
+    rhoend : float, optional
+        Final accuracy in the optimization (not precisely guaranteed). This
+        is a lower bound on the size of the trust region.
+    disp : {0, 1, 2, 3}, optional
+        Controls the frequency of output; 0 implies no output.
+    maxfun : int, optional
+        Maximum number of function evaluations.
+    catol : float, optional
+        Absolute tolerance for constraint violations.
+    callback : callable, optional
+        Called after each iteration, as ``callback(x)``, where ``x`` is the
+        current parameter vector.
+    Returns
+    -------
+    x : ndarray
+        The argument that minimises `f`.
+    See also
+    --------
+    minimize: Interface to minimization algorithms for multivariate
+        functions. See the 'COBYLA' `method` in particular.
+    Notes
+    -----
+    This algorithm is based on linear approximations to the objective
+    function and each constraint. We briefly describe the algorithm.
+    Suppose the function is being minimized over k variables. At the
+    jth iteration the algorithm has k+1 points v_1, ..., v_(k+1),
+    an approximate solution x_j, and a radius RHO_j.
+    (i.e., linear plus a constant) approximations to the objective
+    function and constraint functions such that their function values
+    agree with the linear approximation on the k+1 points v_1,.., v_(k+1).
+    This gives a linear program to solve (where the linear approximations
+    of the constraint functions are constrained to be non-negative).
+    However, the linear approximations are likely only good
+    approximations near the current simplex, so the linear program is
+    given the further requirement that the solution, which
+    will become x_(j+1), must be within RHO_j from x_j. RHO_j only
+    decreases, never increases. The initial RHO_j is rhobeg and the
+    final RHO_j is rhoend. In this way COBYLA's iterations behave
+    like a trust region algorithm.
+    Additionally, the linear program may be inconsistent, or the
+    approximation may give poor improvement. For details about
+    how these issues are resolved, as well as how the points v_i are
+    updated, refer to the source code or the references below.
+    References
+    ----------
+    Powell M.J.D. (1994), "A direct search optimization method that models
+    the objective and constraint functions by linear interpolation.", in
+    Advances in Optimization and Numerical Analysis, eds. S. Gomez and
+    J-P Hennart, Kluwer Academic (Dordrecht), pp. 51-67
+    Powell M.J.D. (1998), "Direct search algorithms for optimization
+    calculations", Acta Numerica 7, 287-336
+    Powell M.J.D. (2007), "A view of algorithms for optimization without
+    derivatives", Cambridge University Technical Report DAMTP 2007/NA03
+    Examples
+    --------
+    Minimize the objective function f(x,y) = x*y subject
+    to the constraints x**2 + y**2 < 1 and y > 0::
+        >>> def objective(x):
+        ...     return x[0]*x[1]
+        ...
+        >>> def constr1(x):
+        ...     return 1 - (x[0]**2 + x[1]**2)
+        ...
+        >>> def constr2(x):
+        ...     return x[1]
+        ...
+        >>> from scipy.optimize import fmin_cobyla
+        >>> fmin_cobyla(objective, [0.0, 0.1], [constr1, constr2], rhoend=1e-7)
+        array([-0.70710685,  0.70710671])
+    The exact solution is (-sqrt(2)/2, sqrt(2)/2).
+    """
+    err = "cons must be a sequence of callable functions or a single"\
+          " callable function."
+    try:
+        len(cons)
+    except TypeError as e:
+        if callable(cons):
+            cons = [cons]
+        else:
+            raise TypeError(err) from e
+    else:
+        for thisfunc in cons:
+            if not callable(thisfunc):
+                raise TypeError(err)
+    if consargs is None:
+        consargs = args
+    # build constraints
+    con = tuple({'type': 'ineq', 'fun': c, 'args': consargs} for c in cons)
+    # options
+    opts = {'rhobeg': rhobeg,
+            'tol': rhoend,
+            'disp': disp,
+            'maxiter': maxfun,
+            'catol': catol,
+            'callback': callback}
+    sol = _minimize_cobyla(func, x0, args, constraints=con,
+                           **opts)
+    if disp and not sol['success']:
+        print(f"COBYLA failed to find a solution: {sol.message}")
+    return sol['x']
+@synchronized
+def _minimize_cobyla(fun, x0, args=(), constraints=(),
+                     rhobeg=1.0, tol=1e-4, maxiter=1000,
+                     disp=False, catol=2e-4, callback=None, bounds=None,
+                     **unknown_options):
+    """
+    Minimize a scalar function of one or more variables using the
+    Constrained Optimization BY Linear Approximation (COBYLA) algorithm.
+    Options
+    -------
+    rhobeg : float
+        Reasonable initial changes to the variables.
+    tol : float
+        Final accuracy in the optimization (not precisely guaranteed).
+        This is a lower bound on the size of the trust region.
+    disp : bool
+        Set to True to print convergence messages. If False,
+        `verbosity` is ignored as set to 0.
+    maxiter : int
+        Maximum number of function evaluations.
+    catol : float
+        Tolerance (absolute) for constraint violations
+    """
+    _check_unknown_options(unknown_options)
+    maxfun = maxiter
+    rhoend = tol
+    iprint = int(bool(disp))
+    # check constraints
+    if isinstance(constraints, dict):
+        constraints = (constraints, )
+    if bounds:
+        i_lb = np.isfinite(bounds.lb)
+        if np.any(i_lb):
+            def lb_constraint(x, *args, **kwargs):
+                return x[i_lb] - bounds.lb[i_lb]
+            constraints.append({'type': 'ineq', 'fun': lb_constraint})
+        i_ub = np.isfinite(bounds.ub)
+        if np.any(i_ub):
+            def ub_constraint(x):
+                return bounds.ub[i_ub] - x[i_ub]
+            constraints.append({'type': 'ineq', 'fun': ub_constraint})
+    for ic, con in enumerate(constraints):
+        # check type
+        try:
+            ctype = con['type'].lower()
+        except KeyError as e:
+            raise KeyError('Constraint %d has no type defined.' % ic) from e
+        except TypeError as e:
+            raise TypeError('Constraints must be defined using a '
+                            'dictionary.') from e
+        except AttributeError as e:
+            raise TypeError("Constraint's type must be a string.") from e
+        else:
+            if ctype != 'ineq':
+                raise ValueError("Constraints of type '%s' not handled by "
+                                 "COBYLA." % con['type'])
+        # check function
+        if 'fun' not in con:
+            raise KeyError('Constraint %d has no function defined.' % ic)
+        # check extra arguments
+        if 'args' not in con:
+            con['args'] = ()
+    # m is the total number of constraint values
+    # it takes into account that some constraints may be vector-valued
+    cons_lengths = []
+    for c in constraints:
+        f = c['fun'](x0, *c['args'])
+        try:
+            cons_length = len(f)
+        except TypeError:
+            cons_length = 1
+        cons_lengths.append(cons_length)
+    m = sum(cons_lengths)
+    # create the ScalarFunction, cobyla doesn't require derivative function
+    def _jac(x, *args):
+        return None
+    sf = _prepare_scalar_function(fun, x0, args=args, jac=_jac)
+    def calcfc(x, con):
+        f = sf.fun(x)
+        i = 0
+        for size, c in izip(cons_lengths, constraints):
+            con[i: i + size] = c['fun'](x, *c['args'])
+            i += size
+        return f
+    def wrapped_callback(x):
+        if callback is not None:
+            callback(np.copy(x))
+    info = np.zeros(4, np.float64)
+    xopt, info = cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
+                                  rhoend=rhoend, iprint=iprint, maxfun=maxfun,
+                                  dinfo=info, callback=wrapped_callback)
+    if info[3] > catol:
+        # Check constraint violation
+        info[0] = 4
+    return OptimizeResult(x=xopt,
+                          status=int(info[0]),
+                          success=info[0] == 1,
+                          message={1: 'Optimization terminated successfully.',
+                                   2: 'Maximum number of function evaluations '
+                                      'has been exceeded.',
+                                   3: 'Rounding errors are becoming damaging '
+                                      'in COBYLA subroutine.',
+                                   4: 'Did not converge to a solution '
+                                      'satisfying the constraints. See '
+                                      '`maxcv` for magnitude of violation.',
+                                   5: 'NaN result encountered.'
+                                   }.get(info[0], 'Unknown exit status.'),
+                          nfev=int(info[1]),
+                          fun=info[2],
+                          maxcv=info[3])

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import numpy as np
+from ._optimize import _check_unknown_options
+def _minimize_cobyqa(fun, x0, args=(), bounds=None, constraints=(),
+                     callback=None, disp=False, maxfev=None, maxiter=None,
+                     f_target=-np.inf, feasibility_tol=1e-8,
+                     initial_tr_radius=1.0, final_tr_radius=1e-6, scale=False,
+                     **unknown_options):
+    """
+    Minimize a scalar function of one or more variables using the
+    Constrained Optimization BY Quadratic Approximations (COBYQA) algorithm [1]_.
+    .. versionadded:: 1.14.0
+    Options
+    -------
+    disp : bool
+        Set to True to print information about the optimization procedure.
+    maxfev : int
+        Maximum number of function evaluations.
+    maxiter : int
+        Maximum number of iterations.
+    f_target : float
+        Target value for the objective function. The optimization procedure is
+        terminated when the objective function value of a feasible point (see
+        `feasibility_tol` below) is less than or equal to this target.
+    feasibility_tol : float
+        Absolute tolerance for the constraint violation.
+    initial_tr_radius : float
+        Initial trust-region radius. Typically, this value should be in the
+        order of one tenth of the greatest expected change to the variables.
+    final_tr_radius : float
+        Final trust-region radius. It should indicate the accuracy required in
+        the final values of the variables. If provided, this option overrides
+        the value of `tol` in the `minimize` function.
+    scale : bool
+        Set to True to scale the variables according to the bounds. If True and
+        if all the lower and upper bounds are finite, the variables are scaled
+        to be within the range :math:`[-1, 1]`. If any of the lower or upper
+        bounds is infinite, the variables are not scaled.
+    References
+    ----------
+    .. [1] COBYQA
+           https://www.cobyqa.com/stable/
+    """
+    from .._lib.cobyqa import minimize  # import here to avoid circular imports
+    _check_unknown_options(unknown_options)
+    options = {
+        'disp': bool(disp),
+        'maxfev': int(maxfev) if maxfev is not None else 500 * len(x0),
+        'maxiter': int(maxiter) if maxiter is not None else 1000 * len(x0),
+        'target': float(f_target),
+        'feasibility_tol': float(feasibility_tol),
+        'radius_init': float(initial_tr_radius),
+        'radius_final': float(final_tr_radius),
+        'scale': bool(scale),
+    }
+    return minimize(fun, x0, args, bounds, constraints, callback, options)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py ADDED Viewed

	@@ -0,0 +1,590 @@

+"""Constraints definition for minimize."""
+import numpy as np
+from ._hessian_update_strategy import BFGS
+from ._differentiable_functions import (
+    VectorFunction, LinearVectorFunction, IdentityVectorFunction)
+from ._optimize import OptimizeWarning
+from warnings import warn, catch_warnings, simplefilter, filterwarnings
+from scipy.sparse import issparse
+def _arr_to_scalar(x):
+    # If x is a numpy array, return x.item().  This will
+    # fail if the array has more than one element.
+    return x.item() if isinstance(x, np.ndarray) else x
+class NonlinearConstraint:
+    """Nonlinear constraint on the variables.
+    The constraint has the general inequality form::
+        lb <= fun(x) <= ub
+    Here the vector of independent variables x is passed as ndarray of shape
+    (n,) and ``fun`` returns a vector with m components.
+    It is possible to use equal bounds to represent an equality constraint or
+    infinite bounds to represent a one-sided constraint.
+    Parameters
+    ----------
+    fun : callable
+        The function defining the constraint.
+        The signature is ``fun(x) -> array_like, shape (m,)``.
+    lb, ub : array_like
+        Lower and upper bounds on the constraint. Each array must have the
+        shape (m,) or be a scalar, in the latter case a bound will be the same
+        for all components of the constraint. Use ``np.inf`` with an
+        appropriate sign to specify a one-sided constraint.
+        Set components of `lb` and `ub` equal to represent an equality
+        constraint. Note that you can mix constraints of different types:
+        interval, one-sided or equality, by setting different components of
+        `lb` and `ub` as  necessary.
+    jac : {callable,  '2-point', '3-point', 'cs'}, optional
+        Method of computing the Jacobian matrix (an m-by-n matrix,
+        where element (i, j) is the partial derivative of f[i] with
+        respect to x[j]).  The keywords {'2-point', '3-point',
+        'cs'} select a finite difference scheme for the numerical estimation.
+        A callable must have the following signature:
+        ``jac(x) -> {ndarray, sparse matrix}, shape (m, n)``.
+        Default is '2-point'.
+    hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy, None}, optional
+        Method for computing the Hessian matrix. The keywords
+        {'2-point', '3-point', 'cs'} select a finite difference scheme for
+        numerical  estimation.  Alternatively, objects implementing
+        `HessianUpdateStrategy` interface can be used to approximate the
+        Hessian. Currently available implementations are:
+            - `BFGS` (default option)
+            - `SR1`
+        A callable must return the Hessian matrix of ``dot(fun, v)`` and
+        must have the following signature:
+        ``hess(x, v) -> {LinearOperator, sparse matrix, array_like}, shape (n, n)``.
+        Here ``v`` is ndarray with shape (m,) containing Lagrange multipliers.
+    keep_feasible : array_like of bool, optional
+        Whether to keep the constraint components feasible throughout
+        iterations. A single value set this property for all components.
+        Default is False. Has no effect for equality constraints.
+    finite_diff_rel_step: None or array_like, optional
+        Relative step size for the finite difference approximation. Default is
+        None, which will select a reasonable value automatically depending
+        on a finite difference scheme.
+    finite_diff_jac_sparsity: {None, array_like, sparse matrix}, optional
+        Defines the sparsity structure of the Jacobian matrix for finite
+        difference estimation, its shape must be (m, n). If the Jacobian has
+        only few non-zero elements in *each* row, providing the sparsity
+        structure will greatly speed up the computations. A zero entry means
+        that a corresponding element in the Jacobian is identically zero.
+        If provided, forces the use of 'lsmr' trust-region solver.
+        If None (default) then dense differencing will be used.
+    Notes
+    -----
+    Finite difference schemes {'2-point', '3-point', 'cs'} may be used for
+    approximating either the Jacobian or the Hessian. We, however, do not allow
+    its use for approximating both simultaneously. Hence whenever the Jacobian
+    is estimated via finite-differences, we require the Hessian to be estimated
+    using one of the quasi-Newton strategies.
+    The scheme 'cs' is potentially the most accurate, but requires the function
+    to correctly handles complex inputs and be analytically continuable to the
+    complex plane. The scheme '3-point' is more accurate than '2-point' but
+    requires twice as many operations.
+    Examples
+    --------
+    Constrain ``x[0] < sin(x[1]) + 1.9``
+    >>> from scipy.optimize import NonlinearConstraint
+    >>> import numpy as np
+    >>> con = lambda x: x[0] - np.sin(x[1])
+    >>> nlc = NonlinearConstraint(con, -np.inf, 1.9)
+    """
+    def __init__(self, fun, lb, ub, jac='2-point', hess=BFGS(),
+                 keep_feasible=False, finite_diff_rel_step=None,
+                 finite_diff_jac_sparsity=None):
+        self.fun = fun
+        self.lb = lb
+        self.ub = ub
+        self.finite_diff_rel_step = finite_diff_rel_step
+        self.finite_diff_jac_sparsity = finite_diff_jac_sparsity
+        self.jac = jac
+        self.hess = hess
+        self.keep_feasible = keep_feasible
+class LinearConstraint:
+    """Linear constraint on the variables.
+    The constraint has the general inequality form::
+        lb <= A.dot(x) <= ub
+    Here the vector of independent variables x is passed as ndarray of shape
+    (n,) and the matrix A has shape (m, n).
+    It is possible to use equal bounds to represent an equality constraint or
+    infinite bounds to represent a one-sided constraint.
+    Parameters
+    ----------
+    A : {array_like, sparse matrix}, shape (m, n)
+        Matrix defining the constraint.
+    lb, ub : dense array_like, optional
+        Lower and upper limits on the constraint. Each array must have the
+        shape (m,) or be a scalar, in the latter case a bound will be the same
+        for all components of the constraint. Use ``np.inf`` with an
+        appropriate sign to specify a one-sided constraint.
+        Set components of `lb` and `ub` equal to represent an equality
+        constraint. Note that you can mix constraints of different types:
+        interval, one-sided or equality, by setting different components of
+        `lb` and `ub` as  necessary. Defaults to ``lb = -np.inf``
+        and ``ub = np.inf`` (no limits).
+    keep_feasible : dense array_like of bool, optional
+        Whether to keep the constraint components feasible throughout
+        iterations. A single value set this property for all components.
+        Default is False. Has no effect for equality constraints.
+    """
+    def _input_validation(self):
+        if self.A.ndim != 2:
+            message = "`A` must have exactly two dimensions."
+            raise ValueError(message)
+        try:
+            shape = self.A.shape[0:1]
+            self.lb = np.broadcast_to(self.lb, shape)
+            self.ub = np.broadcast_to(self.ub, shape)
+            self.keep_feasible = np.broadcast_to(self.keep_feasible, shape)
+        except ValueError:
+            message = ("`lb`, `ub`, and `keep_feasible` must be broadcastable "
+                       "to shape `A.shape[0:1]`")
+            raise ValueError(message)
+    def __init__(self, A, lb=-np.inf, ub=np.inf, keep_feasible=False):
+        if not issparse(A):
+            # In some cases, if the constraint is not valid, this emits a
+            # VisibleDeprecationWarning about ragged nested sequences
+            # before eventually causing an error. `scipy.optimize.milp` would
+            # prefer that this just error out immediately so it can handle it
+            # rather than concerning the user.
+            with catch_warnings():
+                simplefilter("error")
+                self.A = np.atleast_2d(A).astype(np.float64)
+        else:
+            self.A = A
+        if issparse(lb) or issparse(ub):
+            raise ValueError("Constraint limits must be dense arrays.")
+        self.lb = np.atleast_1d(lb).astype(np.float64)
+        self.ub = np.atleast_1d(ub).astype(np.float64)
+        if issparse(keep_feasible):
+            raise ValueError("`keep_feasible` must be a dense array.")
+        self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
+        self._input_validation()
+    def residual(self, x):
+        """
+        Calculate the residual between the constraint function and the limits
+        For a linear constraint of the form::
+            lb <= A@x <= ub
+        the lower and upper residuals between ``A@x`` and the limits are values
+        ``sl`` and ``sb`` such that::
+            lb + sl == A@x == ub - sb
+        When all elements of ``sl`` and ``sb`` are positive, all elements of
+        the constraint are satisfied; a negative element in ``sl`` or ``sb``
+        indicates that the corresponding element of the constraint is not
+        satisfied.
+        Parameters
+        ----------
+        x: array_like
+            Vector of independent variables
+        Returns
+        -------
+        sl, sb : array-like
+            The lower and upper residuals
+        """
+        return self.A@x - self.lb, self.ub - self.A@x
+class Bounds:
+    """Bounds constraint on the variables.
+    The constraint has the general inequality form::
+        lb <= x <= ub
+    It is possible to use equal bounds to represent an equality constraint or
+    infinite bounds to represent a one-sided constraint.
+    Parameters
+    ----------
+    lb, ub : dense array_like, optional
+        Lower and upper bounds on independent variables. `lb`, `ub`, and
+        `keep_feasible` must be the same shape or broadcastable.
+        Set components of `lb` and `ub` equal
+        to fix a variable. Use ``np.inf`` with an appropriate sign to disable
+        bounds on all or some variables. Note that you can mix constraints of
+        different types: interval, one-sided or equality, by setting different
+        components of `lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
+        and ``ub = np.inf`` (no bounds).
+    keep_feasible : dense array_like of bool, optional
+        Whether to keep the constraint components feasible throughout
+        iterations. Must be broadcastable with `lb` and `ub`.
+        Default is False. Has no effect for equality constraints.
+    """
+    def _input_validation(self):
+        try:
+            res = np.broadcast_arrays(self.lb, self.ub, self.keep_feasible)
+            self.lb, self.ub, self.keep_feasible = res
+        except ValueError:
+            message = "`lb`, `ub`, and `keep_feasible` must be broadcastable."
+            raise ValueError(message)
+    def __init__(self, lb=-np.inf, ub=np.inf, keep_feasible=False):
+        if issparse(lb) or issparse(ub):
+            raise ValueError("Lower and upper bounds must be dense arrays.")
+        self.lb = np.atleast_1d(lb)
+        self.ub = np.atleast_1d(ub)
+        if issparse(keep_feasible):
+            raise ValueError("`keep_feasible` must be a dense array.")
+        self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
+        self._input_validation()
+    def __repr__(self):
+        start = f"{type(self).__name__}({self.lb!r}, {self.ub!r}"
+        if np.any(self.keep_feasible):
+            end = f", keep_feasible={self.keep_feasible!r})"
+        else:
+            end = ")"
+        return start + end
+    def residual(self, x):
+        """Calculate the residual (slack) between the input and the bounds
+        For a bound constraint of the form::
+            lb <= x <= ub
+        the lower and upper residuals between `x` and the bounds are values
+        ``sl`` and ``sb`` such that::
+            lb + sl == x == ub - sb
+        When all elements of ``sl`` and ``sb`` are positive, all elements of
+        ``x`` lie within the bounds; a negative element in ``sl`` or ``sb``
+        indicates that the corresponding element of ``x`` is out of bounds.
+        Parameters
+        ----------
+        x: array_like
+            Vector of independent variables
+        Returns
+        -------
+        sl, sb : array-like
+            The lower and upper residuals
+        """
+        return x - self.lb, self.ub - x
+class PreparedConstraint:
+    """Constraint prepared from a user defined constraint.
+    On creation it will check whether a constraint definition is valid and
+    the initial point is feasible. If created successfully, it will contain
+    the attributes listed below.
+    Parameters
+    ----------
+    constraint : {NonlinearConstraint, LinearConstraint`, Bounds}
+        Constraint to check and prepare.
+    x0 : array_like
+        Initial vector of independent variables.
+    sparse_jacobian : bool or None, optional
+        If bool, then the Jacobian of the constraint will be converted
+        to the corresponded format if necessary. If None (default), such
+        conversion is not made.
+    finite_diff_bounds : 2-tuple, optional
+        Lower and upper bounds on the independent variables for the finite
+        difference approximation, if applicable. Defaults to no bounds.
+    Attributes
+    ----------
+    fun : {VectorFunction, LinearVectorFunction, IdentityVectorFunction}
+        Function defining the constraint wrapped by one of the convenience
+        classes.
+    bounds : 2-tuple
+        Contains lower and upper bounds for the constraints --- lb and ub.
+        These are converted to ndarray and have a size equal to the number of
+        the constraints.
+    keep_feasible : ndarray
+         Array indicating which components must be kept feasible with a size
+         equal to the number of the constraints.
+    """
+    def __init__(self, constraint, x0, sparse_jacobian=None,
+                 finite_diff_bounds=(-np.inf, np.inf)):
+        if isinstance(constraint, NonlinearConstraint):
+            fun = VectorFunction(constraint.fun, x0,
+                                 constraint.jac, constraint.hess,
+                                 constraint.finite_diff_rel_step,
+                                 constraint.finite_diff_jac_sparsity,
+                                 finite_diff_bounds, sparse_jacobian)
+        elif isinstance(constraint, LinearConstraint):
+            fun = LinearVectorFunction(constraint.A, x0, sparse_jacobian)
+        elif isinstance(constraint, Bounds):
+            fun = IdentityVectorFunction(x0, sparse_jacobian)
+        else:
+            raise ValueError("`constraint` of an unknown type is passed.")
+        m = fun.m
+        lb = np.asarray(constraint.lb, dtype=float)
+        ub = np.asarray(constraint.ub, dtype=float)
+        keep_feasible = np.asarray(constraint.keep_feasible, dtype=bool)
+        lb = np.broadcast_to(lb, m)
+        ub = np.broadcast_to(ub, m)
+        keep_feasible = np.broadcast_to(keep_feasible, m)
+        if keep_feasible.shape != (m,):
+            raise ValueError("`keep_feasible` has a wrong shape.")
+        mask = keep_feasible & (lb != ub)
+        f0 = fun.f
+        if np.any(f0[mask] < lb[mask]) or np.any(f0[mask] > ub[mask]):
+            raise ValueError("`x0` is infeasible with respect to some "
+                             "inequality constraint with `keep_feasible` "
+                             "set to True.")
+        self.fun = fun
+        self.bounds = (lb, ub)
+        self.keep_feasible = keep_feasible
+    def violation(self, x):
+        """How much the constraint is exceeded by.
+        Parameters
+        ----------
+        x : array-like
+            Vector of independent variables
+        Returns
+        -------
+        excess : array-like
+            How much the constraint is exceeded by, for each of the
+            constraints specified by `PreparedConstraint.fun`.
+        """
+        with catch_warnings():
+            # Ignore the following warning, it's not important when
+            # figuring out total violation
+            # UserWarning: delta_grad == 0.0. Check if the approximated
+            # function is linear
+            filterwarnings("ignore", "delta_grad", UserWarning)
+            ev = self.fun.fun(np.asarray(x))
+        excess_lb = np.maximum(self.bounds[0] - ev, 0)
+        excess_ub = np.maximum(ev - self.bounds[1], 0)
+        return excess_lb + excess_ub
+def new_bounds_to_old(lb, ub, n):
+    """Convert the new bounds representation to the old one.
+    The new representation is a tuple (lb, ub) and the old one is a list
+    containing n tuples, ith containing lower and upper bound on a ith
+    variable.
+    If any of the entries in lb/ub are -np.inf/np.inf they are replaced by
+    None.
+    """
+    lb = np.broadcast_to(lb, n)
+    ub = np.broadcast_to(ub, n)
+    lb = [float(x) if x > -np.inf else None for x in lb]
+    ub = [float(x) if x < np.inf else None for x in ub]
+    return list(zip(lb, ub))
+def old_bound_to_new(bounds):
+    """Convert the old bounds representation to the new one.
+    The new representation is a tuple (lb, ub) and the old one is a list
+    containing n tuples, ith containing lower and upper bound on a ith
+    variable.
+    If any of the entries in lb/ub are None they are replaced by
+    -np.inf/np.inf.
+    """
+    lb, ub = zip(*bounds)
+    # Convert occurrences of None to -inf or inf, and replace occurrences of
+    # any numpy array x with x.item(). Then wrap the results in numpy arrays.
+    lb = np.array([float(_arr_to_scalar(x)) if x is not None else -np.inf
+                   for x in lb])
+    ub = np.array([float(_arr_to_scalar(x)) if x is not None else np.inf
+                   for x in ub])
+    return lb, ub
+def strict_bounds(lb, ub, keep_feasible, n_vars):
+    """Remove bounds which are not asked to be kept feasible."""
+    strict_lb = np.resize(lb, n_vars).astype(float)
+    strict_ub = np.resize(ub, n_vars).astype(float)
+    keep_feasible = np.resize(keep_feasible, n_vars)
+    strict_lb[~keep_feasible] = -np.inf
+    strict_ub[~keep_feasible] = np.inf
+    return strict_lb, strict_ub
+def new_constraint_to_old(con, x0):
+    """
+    Converts new-style constraint objects to old-style constraint dictionaries.
+    """
+    if isinstance(con, NonlinearConstraint):
+        if (con.finite_diff_jac_sparsity is not None or
+                con.finite_diff_rel_step is not None or
+                not isinstance(con.hess, BFGS) or  # misses user specified BFGS
+                con.keep_feasible):
+            warn("Constraint options `finite_diff_jac_sparsity`, "
+                 "`finite_diff_rel_step`, `keep_feasible`, and `hess`"
+                 "are ignored by this method.",
+                 OptimizeWarning, stacklevel=3)
+        fun = con.fun
+        if callable(con.jac):
+            jac = con.jac
+        else:
+            jac = None
+    else:  # LinearConstraint
+        if np.any(con.keep_feasible):
+            warn("Constraint option `keep_feasible` is ignored by this method.",
+                 OptimizeWarning, stacklevel=3)
+        A = con.A
+        if issparse(A):
+            A = A.toarray()
+        def fun(x):
+            return np.dot(A, x)
+        def jac(x):
+            return A
+    # FIXME: when bugs in VectorFunction/LinearVectorFunction are worked out,
+    # use pcon.fun.fun and pcon.fun.jac. Until then, get fun/jac above.
+    pcon = PreparedConstraint(con, x0)
+    lb, ub = pcon.bounds
+    i_eq = lb == ub
+    i_bound_below = np.logical_xor(lb != -np.inf, i_eq)
+    i_bound_above = np.logical_xor(ub != np.inf, i_eq)
+    i_unbounded = np.logical_and(lb == -np.inf, ub == np.inf)
+    if np.any(i_unbounded):
+        warn("At least one constraint is unbounded above and below. Such "
+             "constraints are ignored.",
+             OptimizeWarning, stacklevel=3)
+    ceq = []
+    if np.any(i_eq):
+        def f_eq(x):
+            y = np.array(fun(x)).flatten()
+            return y[i_eq] - lb[i_eq]
+        ceq = [{"type": "eq", "fun": f_eq}]
+        if jac is not None:
+            def j_eq(x):
+                dy = jac(x)
+                if issparse(dy):
+                    dy = dy.toarray()
+                dy = np.atleast_2d(dy)
+                return dy[i_eq, :]
+            ceq[0]["jac"] = j_eq
+    cineq = []
+    n_bound_below = np.sum(i_bound_below)
+    n_bound_above = np.sum(i_bound_above)
+    if n_bound_below + n_bound_above:
+        def f_ineq(x):
+            y = np.zeros(n_bound_below + n_bound_above)
+            y_all = np.array(fun(x)).flatten()
+            y[:n_bound_below] = y_all[i_bound_below] - lb[i_bound_below]
+            y[n_bound_below:] = -(y_all[i_bound_above] - ub[i_bound_above])
+            return y
+        cineq = [{"type": "ineq", "fun": f_ineq}]
+        if jac is not None:
+            def j_ineq(x):
+                dy = np.zeros((n_bound_below + n_bound_above, len(x0)))
+                dy_all = jac(x)
+                if issparse(dy_all):
+                    dy_all = dy_all.toarray()
+                dy_all = np.atleast_2d(dy_all)
+                dy[:n_bound_below, :] = dy_all[i_bound_below]
+                dy[n_bound_below:, :] = -dy_all[i_bound_above]
+                return dy
+            cineq[0]["jac"] = j_ineq
+    old_constraints = ceq + cineq
+    if len(old_constraints) > 1:
+        warn("Equality and inequality constraints are specified in the same "
+             "element of the constraint list. For efficient use with this "
+             "method, equality and inequality constraints should be specified "
+             "in separate elements of the constraint list. ",
+             OptimizeWarning, stacklevel=3)
+    return old_constraints
+def old_constraint_to_new(ic, con):
+    """
+    Converts old-style constraint dictionaries to new-style constraint objects.
+    """
+    # check type
+    try:
+        ctype = con['type'].lower()
+    except KeyError as e:
+        raise KeyError('Constraint %d has no type defined.' % ic) from e
+    except TypeError as e:
+        raise TypeError(
+            'Constraints must be a sequence of dictionaries.'
+        ) from e
+    except AttributeError as e:
+        raise TypeError("Constraint's type must be a string.") from e
+    else:
+        if ctype not in ['eq', 'ineq']:
+            raise ValueError("Unknown constraint type '%s'." % con['type'])
+    if 'fun' not in con:
+        raise ValueError('Constraint %d has no function defined.' % ic)
+    lb = 0
+    if ctype == 'eq':
+        ub = 0
+    else:
+        ub = np.inf
+    jac = '2-point'
+    if 'args' in con:
+        args = con['args']
+        def fun(x):
+            return con["fun"](x, *args)
+        if 'jac' in con:
+            def jac(x):
+                return con["jac"](x, *args)
+    else:
+        fun = con['fun']
+        if 'jac' in con:
+            jac = con['jac']
+    return NonlinearConstraint(fun, lb, ub, jac)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py ADDED Viewed

	@@ -0,0 +1,728 @@

+import numpy as np
+"""
+# 2023 - ported from minpack2.dcsrch, dcstep (Fortran) to Python
+c     MINPACK-1 Project. June 1983.
+c     Argonne National Laboratory.
+c     Jorge J. More' and David J. Thuente.
+c
+c     MINPACK-2 Project. November 1993.
+c     Argonne National Laboratory and University of Minnesota.
+c     Brett M. Averick, Richard G. Carter, and Jorge J. More'.
+"""
+# NOTE this file was linted by black on first commit, and can be kept that way.
+class DCSRCH:
+    """
+    Parameters
+    ----------
+    phi : callable phi(alpha)
+        Function at point `alpha`
+    derphi : callable phi'(alpha)
+        Objective function derivative. Returns a scalar.
+    ftol : float
+        A nonnegative tolerance for the sufficient decrease condition.
+    gtol : float
+        A nonnegative tolerance for the curvature condition.
+    xtol : float
+        A nonnegative relative tolerance for an acceptable step. The
+        subroutine exits with a warning if the relative difference between
+        sty and stx is less than xtol.
+    stpmin : float
+        A nonnegative lower bound for the step.
+    stpmax :
+        A nonnegative upper bound for the step.
+    Notes
+    -----
+    This subroutine finds a step that satisfies a sufficient
+    decrease condition and a curvature condition.
+    Each call of the subroutine updates an interval with
+    endpoints stx and sty. The interval is initially chosen
+    so that it contains a minimizer of the modified function
+           psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
+    If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
+    interval is chosen so that it contains a minimizer of f.
+    The algorithm is designed to find a step that satisfies
+    the sufficient decrease condition
+           f(stp) <= f(0) + ftol*stp*f'(0),
+    and the curvature condition
+           abs(f'(stp)) <= gtol*abs(f'(0)).
+    If ftol is less than gtol and if, for example, the function
+    is bounded below, then there is always a step which satisfies
+    both conditions.
+    If no step can be found that satisfies both conditions, then
+    the algorithm stops with a warning. In this case stp only
+    satisfies the sufficient decrease condition.
+    A typical invocation of dcsrch has the following outline:
+    Evaluate the function at stp = 0.0d0; store in f.
+    Evaluate the gradient at stp = 0.0d0; store in g.
+    Choose a starting step stp.
+    task = 'START'
+    10 continue
+        call dcsrch(stp,f,g,ftol,gtol,xtol,task,stpmin,stpmax,
+                   isave,dsave)
+        if (task .eq. 'FG') then
+           Evaluate the function and the gradient at stp
+           go to 10
+           end if
+    NOTE: The user must not alter work arrays between calls.
+    The subroutine statement is
+        subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
+                         task,isave,dsave)
+        where
+    stp is a double precision variable.
+        On entry stp is the current estimate of a satisfactory
+            step. On initial entry, a positive initial estimate
+            must be provided.
+        On exit stp is the current estimate of a satisfactory step
+            if task = 'FG'. If task = 'CONV' then stp satisfies
+            the sufficient decrease and curvature condition.
+    f is a double precision variable.
+        On initial entry f is the value of the function at 0.
+        On subsequent entries f is the value of the
+            function at stp.
+        On exit f is the value of the function at stp.
+    g is a double precision variable.
+        On initial entry g is the derivative of the function at 0.
+        On subsequent entries g is the derivative of the
+           function at stp.
+        On exit g is the derivative of the function at stp.
+    ftol is a double precision variable.
+        On entry ftol specifies a nonnegative tolerance for the
+           sufficient decrease condition.
+        On exit ftol is unchanged.
+    gtol is a double precision variable.
+        On entry gtol specifies a nonnegative tolerance for the
+           curvature condition.
+        On exit gtol is unchanged.
+    xtol is a double precision variable.
+        On entry xtol specifies a nonnegative relative tolerance
+          for an acceptable step. The subroutine exits with a
+          warning if the relative difference between sty and stx
+          is less than xtol.
+        On exit xtol is unchanged.
+    task is a character variable of length at least 60.
+        On initial entry task must be set to 'START'.
+        On exit task indicates the required action:
+           If task(1:2) = 'FG' then evaluate the function and
+           derivative at stp and call dcsrch again.
+           If task(1:4) = 'CONV' then the search is successful.
+           If task(1:4) = 'WARN' then the subroutine is not able
+           to satisfy the convergence conditions. The exit value of
+           stp contains the best point found during the search.
+          If task(1:5) = 'ERROR' then there is an error in the
+          input arguments.
+        On exit with convergence, a warning or an error, the
+           variable task contains additional information.
+    stpmin is a double precision variable.
+        On entry stpmin is a nonnegative lower bound for the step.
+        On exit stpmin is unchanged.
+    stpmax is a double precision variable.
+        On entry stpmax is a nonnegative upper bound for the step.
+        On exit stpmax is unchanged.
+    isave is an integer work array of dimension 2.
+    dsave is a double precision work array of dimension 13.
+    Subprograms called
+      MINPACK-2 ... dcstep
+    MINPACK-1 Project. June 1983.
+    Argonne National Laboratory.
+    Jorge J. More' and David J. Thuente.
+    MINPACK-2 Project. November 1993.
+    Argonne National Laboratory and University of Minnesota.
+    Brett M. Averick, Richard G. Carter, and Jorge J. More'.
+    """
+    def __init__(self, phi, derphi, ftol, gtol, xtol, stpmin, stpmax):
+        self.stage = None
+        self.ginit = None
+        self.gtest = None
+        self.gx = None
+        self.gy = None
+        self.finit = None
+        self.fx = None
+        self.fy = None
+        self.stx = None
+        self.sty = None
+        self.stmin = None
+        self.stmax = None
+        self.width = None
+        self.width1 = None
+        # leave all assessment of tolerances/limits to the first call of
+        # this object
+        self.ftol = ftol
+        self.gtol = gtol
+        self.xtol = xtol
+        self.stpmin = stpmin
+        self.stpmax = stpmax
+        self.phi = phi
+        self.derphi = derphi
+    def __call__(self, alpha1, phi0=None, derphi0=None, maxiter=100):
+        """
+        Parameters
+        ----------
+        alpha1 : float
+            alpha1 is the current estimate of a satisfactory
+            step. A positive initial estimate must be provided.
+        phi0 : float
+            the value of `phi` at 0 (if known).
+        derphi0 : float
+            the derivative of `derphi` at 0 (if known).
+        maxiter : int
+        Returns
+        -------
+        alpha : float
+            Step size, or None if no suitable step was found.
+        phi : float
+            Value of `phi` at the new point `alpha`.
+        phi0 : float
+            Value of `phi` at `alpha=0`.
+        task : bytes
+            On exit task indicates status information.
+           If task[:4] == b'CONV' then the search is successful.
+           If task[:4] == b'WARN' then the subroutine is not able
+           to satisfy the convergence conditions. The exit value of
+           stp contains the best point found during the search.
+           If task[:5] == b'ERROR' then there is an error in the
+           input arguments.
+        """
+        if phi0 is None:
+            phi0 = self.phi(0.0)
+        if derphi0 is None:
+            derphi0 = self.derphi(0.0)
+        phi1 = phi0
+        derphi1 = derphi0
+        task = b"START"
+        for i in range(maxiter):
+            stp, phi1, derphi1, task = self._iterate(
+                alpha1, phi1, derphi1, task
+            )
+            if not np.isfinite(stp):
+                task = b"WARN"
+                stp = None
+                break
+            if task[:2] == b"FG":
+                alpha1 = stp
+                phi1 = self.phi(stp)
+                derphi1 = self.derphi(stp)
+            else:
+                break
+        else:
+            # maxiter reached, the line search did not converge
+            stp = None
+            task = b"WARNING: dcsrch did not converge within max iterations"
+        if task[:5] == b"ERROR" or task[:4] == b"WARN":
+            stp = None  # failed
+        return stp, phi1, phi0, task
+    def _iterate(self, stp, f, g, task):
+        """
+        Parameters
+        ----------
+        stp : float
+            The current estimate of a satisfactory step. On initial entry, a
+            positive initial estimate must be provided.
+        f : float
+            On first call f is the value of the function at 0. On subsequent
+            entries f should be the value of the function at stp.
+        g : float
+            On initial entry g is the derivative of the function at 0. On
+            subsequent entries g is the derivative of the function at stp.
+        task : bytes
+            On initial entry task must be set to 'START'.
+        On exit with convergence, a warning or an error, the
+           variable task contains additional information.
+        Returns
+        -------
+        stp, f, g, task: tuple
+            stp : float
+                the current estimate of a satisfactory step if task = 'FG'. If
+                task = 'CONV' then stp satisfies the sufficient decrease and
+                curvature condition.
+            f : float
+                the value of the function at stp.
+            g : float
+                the derivative of the function at stp.
+            task : bytes
+                On exit task indicates the required action:
+               If task(1:2) == b'FG' then evaluate the function and
+               derivative at stp and call dcsrch again.
+               If task(1:4) == b'CONV' then the search is successful.
+               If task(1:4) == b'WARN' then the subroutine is not able
+               to satisfy the convergence conditions. The exit value of
+               stp contains the best point found during the search.
+              If task(1:5) == b'ERROR' then there is an error in the
+              input arguments.
+        """
+        p5 = 0.5
+        p66 = 0.66
+        xtrapl = 1.1
+        xtrapu = 4.0
+        if task[:5] == b"START":
+            if stp < self.stpmin:
+                task = b"ERROR: STP .LT. STPMIN"
+            if stp > self.stpmax:
+                task = b"ERROR: STP .GT. STPMAX"
+            if g >= 0:
+                task = b"ERROR: INITIAL G .GE. ZERO"
+            if self.ftol < 0:
+                task = b"ERROR: FTOL .LT. ZERO"
+            if self.gtol < 0:
+                task = b"ERROR: GTOL .LT. ZERO"
+            if self.xtol < 0:
+                task = b"ERROR: XTOL .LT. ZERO"
+            if self.stpmin < 0:
+                task = b"ERROR: STPMIN .LT. ZERO"
+            if self.stpmax < self.stpmin:
+                task = b"ERROR: STPMAX .LT. STPMIN"
+            if task[:5] == b"ERROR":
+                return stp, f, g, task
+            # Initialize local variables.
+            self.brackt = False
+            self.stage = 1
+            self.finit = f
+            self.ginit = g
+            self.gtest = self.ftol * self.ginit
+            self.width = self.stpmax - self.stpmin
+            self.width1 = self.width / p5
+            # The variables stx, fx, gx contain the values of the step,
+            # function, and derivative at the best step.
+            # The variables sty, fy, gy contain the value of the step,
+            # function, and derivative at sty.
+            # The variables stp, f, g contain the values of the step,
+            # function, and derivative at stp.
+            self.stx = 0.0
+            self.fx = self.finit
+            self.gx = self.ginit
+            self.sty = 0.0
+            self.fy = self.finit
+            self.gy = self.ginit
+            self.stmin = 0
+            self.stmax = stp + xtrapu * stp
+            task = b"FG"
+            return stp, f, g, task
+        # in the original Fortran this was a location to restore variables
+        # we don't need to do that because they're attributes.
+        # If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
+        # algorithm enters the second stage.
+        ftest = self.finit + stp * self.gtest
+        if self.stage == 1 and f <= ftest and g >= 0:
+            self.stage = 2
+        # test for warnings
+        if self.brackt and (stp <= self.stmin or stp >= self.stmax):
+            task = b"WARNING: ROUNDING ERRORS PREVENT PROGRESS"
+        if self.brackt and self.stmax - self.stmin <= self.xtol * self.stmax:
+            task = b"WARNING: XTOL TEST SATISFIED"
+        if stp == self.stpmax and f <= ftest and g <= self.gtest:
+            task = b"WARNING: STP = STPMAX"
+        if stp == self.stpmin and (f > ftest or g >= self.gtest):
+            task = b"WARNING: STP = STPMIN"
+        # test for convergence
+        if f <= ftest and abs(g) <= self.gtol * -self.ginit:
+            task = b"CONVERGENCE"
+        # test for termination
+        if task[:4] == b"WARN" or task[:4] == b"CONV":
+            return stp, f, g, task
+        # A modified function is used to predict the step during the
+        # first stage if a lower function value has been obtained but
+        # the decrease is not sufficient.
+        if self.stage == 1 and f <= self.fx and f > ftest:
+            # Define the modified function and derivative values.
+            fm = f - stp * self.gtest
+            fxm = self.fx - self.stx * self.gtest
+            fym = self.fy - self.sty * self.gtest
+            gm = g - self.gtest
+            gxm = self.gx - self.gtest
+            gym = self.gy - self.gtest
+            # Call dcstep to update stx, sty, and to compute the new step.
+            # dcstep can have several operations which can produce NaN
+            # e.g. inf/inf. Filter these out.
+            with np.errstate(invalid="ignore", over="ignore"):
+                tup = dcstep(
+                    self.stx,
+                    fxm,
+                    gxm,
+                    self.sty,
+                    fym,
+                    gym,
+                    stp,
+                    fm,
+                    gm,
+                    self.brackt,
+                    self.stmin,
+                    self.stmax,
+                )
+                self.stx, fxm, gxm, self.sty, fym, gym, stp, self.brackt = tup
+            # Reset the function and derivative values for f
+            self.fx = fxm + self.stx * self.gtest
+            self.fy = fym + self.sty * self.gtest
+            self.gx = gxm + self.gtest
+            self.gy = gym + self.gtest
+        else:
+            # Call dcstep to update stx, sty, and to compute the new step.
+            # dcstep can have several operations which can produce NaN
+            # e.g. inf/inf. Filter these out.
+            with np.errstate(invalid="ignore", over="ignore"):
+                tup = dcstep(
+                    self.stx,
+                    self.fx,
+                    self.gx,
+                    self.sty,
+                    self.fy,
+                    self.gy,
+                    stp,
+                    f,
+                    g,
+                    self.brackt,
+                    self.stmin,
+                    self.stmax,
+                )
+            (
+                self.stx,
+                self.fx,
+                self.gx,
+                self.sty,
+                self.fy,
+                self.gy,
+                stp,
+                self.brackt,
+            ) = tup
+        # Decide if a bisection step is needed
+        if self.brackt:
+            if abs(self.sty - self.stx) >= p66 * self.width1:
+                stp = self.stx + p5 * (self.sty - self.stx)
+            self.width1 = self.width
+            self.width = abs(self.sty - self.stx)
+        # Set the minimum and maximum steps allowed for stp.
+        if self.brackt:
+            self.stmin = min(self.stx, self.sty)
+            self.stmax = max(self.stx, self.sty)
+        else:
+            self.stmin = stp + xtrapl * (stp - self.stx)
+            self.stmax = stp + xtrapu * (stp - self.stx)
+        # Force the step to be within the bounds stpmax and stpmin.
+        stp = np.clip(stp, self.stpmin, self.stpmax)
+        # If further progress is not possible, let stp be the best
+        # point obtained during the search.
+        if (
+            self.brackt
+            and (stp <= self.stmin or stp >= self.stmax)
+            or (
+                self.brackt
+                and self.stmax - self.stmin <= self.xtol * self.stmax
+            )
+        ):
+            stp = self.stx
+        # Obtain another function and derivative
+        task = b"FG"
+        return stp, f, g, task
+def dcstep(stx, fx, dx, sty, fy, dy, stp, fp, dp, brackt, stpmin, stpmax):
+    """
+    Subroutine dcstep
+    This subroutine computes a safeguarded step for a search
+    procedure and updates an interval that contains a step that
+    satisfies a sufficient decrease and a curvature condition.
+    The parameter stx contains the step with the least function
+    value. If brackt is set to .true. then a minimizer has
+    been bracketed in an interval with endpoints stx and sty.
+    The parameter stp contains the current step.
+    The subroutine assumes that if brackt is set to .true. then
+        min(stx,sty) < stp < max(stx,sty),
+    and that the derivative at stx is negative in the direction
+    of the step.
+    The subroutine statement is
+      subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
+                        stpmin,stpmax)
+    where
+    stx is a double precision variable.
+        On entry stx is the best step obtained so far and is an
+          endpoint of the interval that contains the minimizer.
+        On exit stx is the updated best step.
+    fx is a double precision variable.
+        On entry fx is the function at stx.
+        On exit fx is the function at stx.
+    dx is a double precision variable.
+        On entry dx is the derivative of the function at
+          stx. The derivative must be negative in the direction of
+          the step, that is, dx and stp - stx must have opposite
+          signs.
+        On exit dx is the derivative of the function at stx.
+    sty is a double precision variable.
+        On entry sty is the second endpoint of the interval that
+          contains the minimizer.
+        On exit sty is the updated endpoint of the interval that
+          contains the minimizer.
+    fy is a double precision variable.
+        On entry fy is the function at sty.
+        On exit fy is the function at sty.
+    dy is a double precision variable.
+        On entry dy is the derivative of the function at sty.
+        On exit dy is the derivative of the function at the exit sty.
+    stp is a double precision variable.
+        On entry stp is the current step. If brackt is set to .true.
+          then on input stp must be between stx and sty.
+        On exit stp is a new trial step.
+    fp is a double precision variable.
+        On entry fp is the function at stp
+        On exit fp is unchanged.
+    dp is a double precision variable.
+        On entry dp is the derivative of the function at stp.
+        On exit dp is unchanged.
+    brackt is an logical variable.
+        On entry brackt specifies if a minimizer has been bracketed.
+            Initially brackt must be set to .false.
+        On exit brackt specifies if a minimizer has been bracketed.
+            When a minimizer is bracketed brackt is set to .true.
+    stpmin is a double precision variable.
+        On entry stpmin is a lower bound for the step.
+        On exit stpmin is unchanged.
+    stpmax is a double precision variable.
+        On entry stpmax is an upper bound for the step.
+        On exit stpmax is unchanged.
+    MINPACK-1 Project. June 1983
+    Argonne National Laboratory.
+    Jorge J. More' and David J. Thuente.
+    MINPACK-2 Project. November 1993.
+    Argonne National Laboratory and University of Minnesota.
+    Brett M. Averick and Jorge J. More'.
+    """
+    sgn_dp = np.sign(dp)
+    sgn_dx = np.sign(dx)
+    # sgnd = dp * (dx / abs(dx))
+    sgnd = sgn_dp * sgn_dx
+    # First case: A higher function value. The minimum is bracketed.
+    # If the cubic step is closer to stx than the quadratic step, the
+    # cubic step is taken, otherwise the average of the cubic and
+    # quadratic steps is taken.
+    if fp > fx:
+        theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp
+        s = max(abs(theta), abs(dx), abs(dp))
+        gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
+        if stp < stx:
+            gamma *= -1
+        p = (gamma - dx) + theta
+        q = ((gamma - dx) + gamma) + dp
+        r = p / q
+        stpc = stx + r * (stp - stx)
+        stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.0) * (stp - stx)
+        if abs(stpc - stx) <= abs(stpq - stx):
+            stpf = stpc
+        else:
+            stpf = stpc + (stpq - stpc) / 2.0
+        brackt = True
+    elif sgnd < 0.0:
+        # Second case: A lower function value and derivatives of opposite
+        # sign. The minimum is bracketed. If the cubic step is farther from
+        # stp than the secant step, the cubic step is taken, otherwise the
+        # secant step is taken.
+        theta = 3 * (fx - fp) / (stp - stx) + dx + dp
+        s = max(abs(theta), abs(dx), abs(dp))
+        gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
+        if stp > stx:
+            gamma *= -1
+        p = (gamma - dp) + theta
+        q = ((gamma - dp) + gamma) + dx
+        r = p / q
+        stpc = stp + r * (stx - stp)
+        stpq = stp + (dp / (dp - dx)) * (stx - stp)
+        if abs(stpc - stp) > abs(stpq - stp):
+            stpf = stpc
+        else:
+            stpf = stpq
+        brackt = True
+    elif abs(dp) < abs(dx):
+        # Third case: A lower function value, derivatives of the same sign,
+        # and the magnitude of the derivative decreases.
+        # The cubic step is computed only if the cubic tends to infinity
+        # in the direction of the step or if the minimum of the cubic
+        # is beyond stp. Otherwise the cubic step is defined to be the
+        # secant step.
+        theta = 3 * (fx - fp) / (stp - stx) + dx + dp
+        s = max(abs(theta), abs(dx), abs(dp))
+        # The case gamma = 0 only arises if the cubic does not tend
+        # to infinity in the direction of the step.
+        gamma = s * np.sqrt(max(0, (theta / s) ** 2 - (dx / s) * (dp / s)))
+        if stp > stx:
+            gamma = -gamma
+        p = (gamma - dp) + theta
+        q = (gamma + (dx - dp)) + gamma
+        r = p / q
+        if r < 0 and gamma != 0:
+            stpc = stp + r * (stx - stp)
+        elif stp > stx:
+            stpc = stpmax
+        else:
+            stpc = stpmin
+        stpq = stp + (dp / (dp - dx)) * (stx - stp)
+        if brackt:
+            # A minimizer has been bracketed. If the cubic step is
+            # closer to stp than the secant step, the cubic step is
+            # taken, otherwise the secant step is taken.
+            if abs(stpc - stp) < abs(stpq - stp):
+                stpf = stpc
+            else:
+                stpf = stpq
+            if stp > stx:
+                stpf = min(stp + 0.66 * (sty - stp), stpf)
+            else:
+                stpf = max(stp + 0.66 * (sty - stp), stpf)
+        else:
+            # A minimizer has not been bracketed. If the cubic step is
+            # farther from stp than the secant step, the cubic step is
+            # taken, otherwise the secant step is taken.
+            if abs(stpc - stp) > abs(stpq - stp):
+                stpf = stpc
+            else:
+                stpf = stpq
+            stpf = np.clip(stpf, stpmin, stpmax)
+    else:
+        # Fourth case: A lower function value, derivatives of the same sign,
+        # and the magnitude of the derivative does not decrease. If the
+        # minimum is not bracketed, the step is either stpmin or stpmax,
+        # otherwise the cubic step is taken.
+        if brackt:
+            theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp
+            s = max(abs(theta), abs(dy), abs(dp))
+            gamma = s * np.sqrt((theta / s) ** 2 - (dy / s) * (dp / s))
+            if stp > sty:
+                gamma = -gamma
+            p = (gamma - dp) + theta
+            q = ((gamma - dp) + gamma) + dy
+            r = p / q
+            stpc = stp + r * (sty - stp)
+            stpf = stpc
+        elif stp > stx:
+            stpf = stpmax
+        else:
+            stpf = stpmin
+    # Update the interval which contains a minimizer.
+    if fp > fx:
+        sty = stp
+        fy = fp
+        dy = dp
+    else:
+        if sgnd < 0:
+            sty = stx
+            fy = fx
+            dy = dx
+        stx = stp
+        fx = fp
+        dx = dp
+    # Compute the new step.
+    stp = stpf
+    return stx, fx, dx, sty, fy, dy, stp, brackt

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py ADDED Viewed

	@@ -0,0 +1,693 @@

+import numpy as np
+import scipy.sparse as sps
+from ._numdiff import approx_derivative, group_columns
+from ._hessian_update_strategy import HessianUpdateStrategy
+from scipy.sparse.linalg import LinearOperator
+from scipy._lib._array_api import atleast_nd, array_namespace
+FD_METHODS = ('2-point', '3-point', 'cs')
+def _wrapper_fun(fun, args=()):
+    ncalls = [0]
+    def wrapped(x):
+        ncalls[0] += 1
+        # Send a copy because the user may overwrite it.
+        # Overwriting results in undefined behaviour because
+        # fun(self.x) will change self.x, with the two no longer linked.
+        fx = fun(np.copy(x), *args)
+        # Make sure the function returns a true scalar
+        if not np.isscalar(fx):
+            try:
+                fx = np.asarray(fx).item()
+            except (TypeError, ValueError) as e:
+                raise ValueError(
+                    "The user-provided objective function "
+                    "must return a scalar value."
+                ) from e
+        return fx
+    return wrapped, ncalls
+def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None):
+    ncalls = [0]
+    if callable(grad):
+        def wrapped(x, **kwds):
+            # kwds present to give function same signature as numdiff variant
+            ncalls[0] += 1
+            return np.atleast_1d(grad(np.copy(x), *args))
+        return wrapped, ncalls
+    elif grad in FD_METHODS:
+        def wrapped1(x, f0=None):
+            ncalls[0] += 1
+            return approx_derivative(
+                fun, x, f0=f0, **finite_diff_options
+            )
+        return wrapped1, ncalls
+def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
+    if callable(hess):
+        H = hess(np.copy(x0), *args)
+        ncalls = [1]
+        if sps.issparse(H):
+            def wrapped(x, **kwds):
+                ncalls[0] += 1
+                return sps.csr_matrix(hess(np.copy(x), *args))
+            H = sps.csr_matrix(H)
+        elif isinstance(H, LinearOperator):
+            def wrapped(x, **kwds):
+                ncalls[0] += 1
+                return hess(np.copy(x), *args)
+        else:  # dense
+            def wrapped(x, **kwds):
+                ncalls[0] += 1
+                return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
+            H = np.atleast_2d(np.asarray(H))
+        return wrapped, ncalls, H
+    elif hess in FD_METHODS:
+        ncalls = [0]
+        def wrapped1(x, f0=None):
+            return approx_derivative(
+                grad, x, f0=f0, **finite_diff_options
+            )
+        return wrapped1, ncalls, None
+class ScalarFunction:
+    """Scalar function and its derivatives.
+    This class defines a scalar function F: R^n->R and methods for
+    computing or approximating its first and second derivatives.
+    Parameters
+    ----------
+    fun : callable
+        evaluates the scalar function. Must be of the form ``fun(x, *args)``,
+        where ``x`` is the argument in the form of a 1-D array and ``args`` is
+        a tuple of any additional fixed parameters needed to completely specify
+        the function. Should return a scalar.
+    x0 : array-like
+        Provides an initial set of variables for evaluating fun. Array of real
+        elements of size (n,), where 'n' is the number of independent
+        variables.
+    args : tuple, optional
+        Any additional fixed parameters needed to completely specify the scalar
+        function.
+    grad : {callable, '2-point', '3-point', 'cs'}
+        Method for computing the gradient vector.
+        If it is a callable, it should be a function that returns the gradient
+        vector:
+            ``grad(x, *args) -> array_like, shape (n,)``
+        where ``x`` is an array with shape (n,) and ``args`` is a tuple with
+        the fixed parameters.
+        Alternatively, the keywords  {'2-point', '3-point', 'cs'} can be used
+        to select a finite difference scheme for numerical estimation of the
+        gradient with a relative step size. These finite difference schemes
+        obey any specified `bounds`.
+    hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
+        Method for computing the Hessian matrix. If it is callable, it should
+        return the  Hessian matrix:
+            ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
+        where x is a (n,) ndarray and `args` is a tuple with the fixed
+        parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
+        select a finite difference scheme for numerical estimation. Or, objects
+        implementing `HessianUpdateStrategy` interface can be used to
+        approximate the Hessian.
+        Whenever the gradient is estimated via finite-differences, the Hessian
+        cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
+        to be estimated using one of the quasi-Newton strategies.
+    finite_diff_rel_step : None or array_like
+        Relative step size to use. The absolute step size is computed as
+        ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
+        adjusted to fit into the bounds. For ``method='3-point'`` the sign
+        of `h` is ignored. If None then finite_diff_rel_step is selected
+        automatically,
+    finite_diff_bounds : tuple of array_like
+        Lower and upper bounds on independent variables. Defaults to no bounds,
+        (-np.inf, np.inf). Each bound must match the size of `x0` or be a
+        scalar, in the latter case the bound will be the same for all
+        variables. Use it to limit the range of function evaluation.
+    epsilon : None or array_like, optional
+        Absolute step size to use, possibly adjusted to fit into the bounds.
+        For ``method='3-point'`` the sign of `epsilon` is ignored. By default
+        relative steps are used, only if ``epsilon is not None`` are absolute
+        steps used.
+    Notes
+    -----
+    This class implements a memoization logic. There are methods `fun`,
+    `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
+    things should be considered:
+        1. Use only public methods `fun`, `grad` and `hess`.
+        2. After one of the methods is called, the corresponding attribute
+           will be set. However, a subsequent call with a different argument
+           of *any* of the methods may overwrite the attribute.
+    """
+    def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
+                 finite_diff_bounds, epsilon=None):
+        if not callable(grad) and grad not in FD_METHODS:
+            raise ValueError(
+                f"`grad` must be either callable or one of {FD_METHODS}."
+            )
+        if not (callable(hess) or hess in FD_METHODS
+                or isinstance(hess, HessianUpdateStrategy)):
+            raise ValueError(
+                f"`hess` must be either callable, HessianUpdateStrategy"
+                f" or one of {FD_METHODS}."
+            )
+        if grad in FD_METHODS and hess in FD_METHODS:
+            raise ValueError("Whenever the gradient is estimated via "
+                             "finite-differences, we require the Hessian "
+                             "to be estimated using one of the "
+                             "quasi-Newton strategies.")
+        self.xp = xp = array_namespace(x0)
+        _x = atleast_nd(x0, ndim=1, xp=xp)
+        _dtype = xp.float64
+        if xp.isdtype(_x.dtype, "real floating"):
+            _dtype = _x.dtype
+        # original arguments
+        self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args)
+        self._orig_fun = fun
+        self._orig_grad = grad
+        self._orig_hess = hess
+        self._args = args
+        # promotes to floating
+        self.x = xp.astype(_x, _dtype)
+        self.x_dtype = _dtype
+        self.n = self.x.size
+        self.f_updated = False
+        self.g_updated = False
+        self.H_updated = False
+        self._lowest_x = None
+        self._lowest_f = np.inf
+        finite_diff_options = {}
+        if grad in FD_METHODS:
+            finite_diff_options["method"] = grad
+            finite_diff_options["rel_step"] = finite_diff_rel_step
+            finite_diff_options["abs_step"] = epsilon
+            finite_diff_options["bounds"] = finite_diff_bounds
+        if hess in FD_METHODS:
+            finite_diff_options["method"] = hess
+            finite_diff_options["rel_step"] = finite_diff_rel_step
+            finite_diff_options["abs_step"] = epsilon
+            finite_diff_options["as_linear_operator"] = True
+        # Initial function evaluation
+        self._update_fun()
+        # Initial gradient evaluation
+        self._wrapped_grad, self._ngev = _wrapper_grad(
+            grad,
+            fun=self._wrapped_fun,
+            args=args,
+            finite_diff_options=finite_diff_options
+        )
+        self._update_grad()
+        # Hessian evaluation
+        if callable(hess):
+            self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
+                hess, x0=x0, args=args
+            )
+            self.H_updated = True
+        elif hess in FD_METHODS:
+            self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
+                hess,
+                grad=self._wrapped_grad,
+                x0=x0,
+                finite_diff_options=finite_diff_options
+            )
+            self._update_grad()
+            self.H = self._wrapped_hess(self.x, f0=self.g)
+            self.H_updated = True
+        elif isinstance(hess, HessianUpdateStrategy):
+            self.H = hess
+            self.H.initialize(self.n, 'hess')
+            self.H_updated = True
+            self.x_prev = None
+            self.g_prev = None
+            self._nhev = [0]
+    @property
+    def nfev(self):
+        return self._nfev[0]
+    @property
+    def ngev(self):
+        return self._ngev[0]
+    @property
+    def nhev(self):
+        return self._nhev[0]
+    def _update_x(self, x):
+        if isinstance(self._orig_hess, HessianUpdateStrategy):
+            self._update_grad()
+            self.x_prev = self.x
+            self.g_prev = self.g
+            # ensure that self.x is a copy of x. Don't store a reference
+            # otherwise the memoization doesn't work properly.
+            _x = atleast_nd(x, ndim=1, xp=self.xp)
+            self.x = self.xp.astype(_x, self.x_dtype)
+            self.f_updated = False
+            self.g_updated = False
+            self.H_updated = False
+            self._update_hess()
+        else:
+            # ensure that self.x is a copy of x. Don't store a reference
+            # otherwise the memoization doesn't work properly.
+            _x = atleast_nd(x, ndim=1, xp=self.xp)
+            self.x = self.xp.astype(_x, self.x_dtype)
+            self.f_updated = False
+            self.g_updated = False
+            self.H_updated = False
+    def _update_fun(self):
+        if not self.f_updated:
+            fx = self._wrapped_fun(self.x)
+            if fx < self._lowest_f:
+                self._lowest_x = self.x
+                self._lowest_f = fx
+            self.f = fx
+            self.f_updated = True
+    def _update_grad(self):
+        if not self.g_updated:
+            if self._orig_grad in FD_METHODS:
+                self._update_fun()
+            self.g = self._wrapped_grad(self.x, f0=self.f)
+            self.g_updated = True
+    def _update_hess(self):
+        if not self.H_updated:
+            if self._orig_hess in FD_METHODS:
+                self._update_grad()
+                self.H = self._wrapped_hess(self.x, f0=self.g)
+            elif isinstance(self._orig_hess, HessianUpdateStrategy):
+                self._update_grad()
+                self.H.update(self.x - self.x_prev, self.g - self.g_prev)
+            else:       # should be callable(hess)
+                self.H = self._wrapped_hess(self.x)
+            self.H_updated = True
+    def fun(self, x):
+        if not np.array_equal(x, self.x):
+            self._update_x(x)
+        self._update_fun()
+        return self.f
+    def grad(self, x):
+        if not np.array_equal(x, self.x):
+            self._update_x(x)
+        self._update_grad()
+        return self.g
+    def hess(self, x):
+        if not np.array_equal(x, self.x):
+            self._update_x(x)
+        self._update_hess()
+        return self.H
+    def fun_and_grad(self, x):
+        if not np.array_equal(x, self.x):
+            self._update_x(x)
+        self._update_fun()
+        self._update_grad()
+        return self.f, self.g
+class VectorFunction:
+    """Vector function and its derivatives.
+    This class defines a vector function F: R^n->R^m and methods for
+    computing or approximating its first and second derivatives.
+    Notes
+    -----
+    This class implements a memoization logic. There are methods `fun`,
+    `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
+    things should be considered:
+        1. Use only public methods `fun`, `jac` and `hess`.
+        2. After one of the methods is called, the corresponding attribute
+           will be set. However, a subsequent call with a different argument
+           of *any* of the methods may overwrite the attribute.
+    """
+    def __init__(self, fun, x0, jac, hess,
+                 finite_diff_rel_step, finite_diff_jac_sparsity,
+                 finite_diff_bounds, sparse_jacobian):
+        if not callable(jac) and jac not in FD_METHODS:
+            raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")
+        if not (callable(hess) or hess in FD_METHODS
+                or isinstance(hess, HessianUpdateStrategy)):
+            raise ValueError("`hess` must be either callable,"
+                             f"HessianUpdateStrategy or one of {FD_METHODS}.")
+        if jac in FD_METHODS and hess in FD_METHODS:
+            raise ValueError("Whenever the Jacobian is estimated via "
+                             "finite-differences, we require the Hessian to "
+                             "be estimated using one of the quasi-Newton "
+                             "strategies.")
+        self.xp = xp = array_namespace(x0)
+        _x = atleast_nd(x0, ndim=1, xp=xp)
+        _dtype = xp.float64
+        if xp.isdtype(_x.dtype, "real floating"):
+            _dtype = _x.dtype
+        # promotes to floating
+        self.x = xp.astype(_x, _dtype)
+        self.x_dtype = _dtype
+        self.n = self.x.size
+        self.nfev = 0
+        self.njev = 0
+        self.nhev = 0
+        self.f_updated = False
+        self.J_updated = False
+        self.H_updated = False
+        finite_diff_options = {}
+        if jac in FD_METHODS:
+            finite_diff_options["method"] = jac
+            finite_diff_options["rel_step"] = finite_diff_rel_step
+            if finite_diff_jac_sparsity is not None:
+                sparsity_groups = group_columns(finite_diff_jac_sparsity)
+                finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
+                                                   sparsity_groups)
+            finite_diff_options["bounds"] = finite_diff_bounds
+            self.x_diff = np.copy(self.x)
+        if hess in FD_METHODS:
+            finite_diff_options["method"] = hess
+            finite_diff_options["rel_step"] = finite_diff_rel_step
+            finite_diff_options["as_linear_operator"] = True
+            self.x_diff = np.copy(self.x)
+        if jac in FD_METHODS and hess in FD_METHODS:
+            raise ValueError("Whenever the Jacobian is estimated via "
+                             "finite-differences, we require the Hessian to "
+                             "be estimated using one of the quasi-Newton "
+                             "strategies.")
+        # Function evaluation
+        def fun_wrapped(x):
+            self.nfev += 1
+            return np.atleast_1d(fun(x))
+        def update_fun():
+            self.f = fun_wrapped(self.x)
+        self._update_fun_impl = update_fun
+        update_fun()
+        self.v = np.zeros_like(self.f)
+        self.m = self.v.size
+        # Jacobian Evaluation
+        if callable(jac):
+            self.J = jac(self.x)
+            self.J_updated = True
+            self.njev += 1
+            if (sparse_jacobian or
+                    sparse_jacobian is None and sps.issparse(self.J)):
+                def jac_wrapped(x):
+                    self.njev += 1
+                    return sps.csr_matrix(jac(x))
+                self.J = sps.csr_matrix(self.J)
+                self.sparse_jacobian = True
+            elif sps.issparse(self.J):
+                def jac_wrapped(x):
+                    self.njev += 1
+                    return jac(x).toarray()
+                self.J = self.J.toarray()
+                self.sparse_jacobian = False
+            else:
+                def jac_wrapped(x):
+                    self.njev += 1
+                    return np.atleast_2d(jac(x))
+                self.J = np.atleast_2d(self.J)
+                self.sparse_jacobian = False
+            def update_jac():
+                self.J = jac_wrapped(self.x)
+        elif jac in FD_METHODS:
+            self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
+                                       **finite_diff_options)
+            self.J_updated = True
+            if (sparse_jacobian or
+                    sparse_jacobian is None and sps.issparse(self.J)):
+                def update_jac():
+                    self._update_fun()
+                    self.J = sps.csr_matrix(
+                        approx_derivative(fun_wrapped, self.x, f0=self.f,
+                                          **finite_diff_options))
+                self.J = sps.csr_matrix(self.J)
+                self.sparse_jacobian = True
+            elif sps.issparse(self.J):
+                def update_jac():
+                    self._update_fun()
+                    self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
+                                               **finite_diff_options).toarray()
+                self.J = self.J.toarray()
+                self.sparse_jacobian = False
+            else:
+                def update_jac():
+                    self._update_fun()
+                    self.J = np.atleast_2d(
+                        approx_derivative(fun_wrapped, self.x, f0=self.f,
+                                          **finite_diff_options))
+                self.J = np.atleast_2d(self.J)
+                self.sparse_jacobian = False
+        self._update_jac_impl = update_jac
+        # Define Hessian
+        if callable(hess):
+            self.H = hess(self.x, self.v)
+            self.H_updated = True
+            self.nhev += 1
+            if sps.issparse(self.H):
+                def hess_wrapped(x, v):
+                    self.nhev += 1
+                    return sps.csr_matrix(hess(x, v))
+                self.H = sps.csr_matrix(self.H)
+            elif isinstance(self.H, LinearOperator):
+                def hess_wrapped(x, v):
+                    self.nhev += 1
+                    return hess(x, v)
+            else:
+                def hess_wrapped(x, v):
+                    self.nhev += 1
+                    return np.atleast_2d(np.asarray(hess(x, v)))
+                self.H = np.atleast_2d(np.asarray(self.H))
+            def update_hess():
+                self.H = hess_wrapped(self.x, self.v)
+        elif hess in FD_METHODS:
+            def jac_dot_v(x, v):
+                return jac_wrapped(x).T.dot(v)
+            def update_hess():
+                self._update_jac()
+                self.H = approx_derivative(jac_dot_v, self.x,
+                                           f0=self.J.T.dot(self.v),
+                                           args=(self.v,),
+                                           **finite_diff_options)
+            update_hess()
+            self.H_updated = True
+        elif isinstance(hess, HessianUpdateStrategy):
+            self.H = hess
+            self.H.initialize(self.n, 'hess')
+            self.H_updated = True
+            self.x_prev = None
+            self.J_prev = None
+            def update_hess():
+                self._update_jac()
+                # When v is updated before x was updated, then x_prev and
+                # J_prev are None and we need this check.
+                if self.x_prev is not None and self.J_prev is not None:
+                    delta_x = self.x - self.x_prev
+                    delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
+                    self.H.update(delta_x, delta_g)
+        self._update_hess_impl = update_hess
+        if isinstance(hess, HessianUpdateStrategy):
+            def update_x(x):
+                self._update_jac()
+                self.x_prev = self.x
+                self.J_prev = self.J
+                _x = atleast_nd(x, ndim=1, xp=self.xp)
+                self.x = self.xp.astype(_x, self.x_dtype)
+                self.f_updated = False
+                self.J_updated = False
+                self.H_updated = False
+                self._update_hess()
+        else:
+            def update_x(x):
+                _x = atleast_nd(x, ndim=1, xp=self.xp)
+                self.x = self.xp.astype(_x, self.x_dtype)
+                self.f_updated = False
+                self.J_updated = False
+                self.H_updated = False
+        self._update_x_impl = update_x
+    def _update_v(self, v):
+        if not np.array_equal(v, self.v):
+            self.v = v
+            self.H_updated = False
+    def _update_x(self, x):
+        if not np.array_equal(x, self.x):
+            self._update_x_impl(x)
+    def _update_fun(self):
+        if not self.f_updated:
+            self._update_fun_impl()
+            self.f_updated = True
+    def _update_jac(self):
+        if not self.J_updated:
+            self._update_jac_impl()
+            self.J_updated = True
+    def _update_hess(self):
+        if not self.H_updated:
+            self._update_hess_impl()
+            self.H_updated = True
+    def fun(self, x):
+        self._update_x(x)
+        self._update_fun()
+        return self.f
+    def jac(self, x):
+        self._update_x(x)
+        self._update_jac()
+        return self.J
+    def hess(self, x, v):
+        # v should be updated before x.
+        self._update_v(v)
+        self._update_x(x)
+        self._update_hess()
+        return self.H
+class LinearVectorFunction:
+    """Linear vector function and its derivatives.
+    Defines a linear function F = A x, where x is N-D vector and
+    A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
+    is identically zero and it is returned as a csr matrix.
+    """
+    def __init__(self, A, x0, sparse_jacobian):
+        if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
+            self.J = sps.csr_matrix(A)
+            self.sparse_jacobian = True
+        elif sps.issparse(A):
+            self.J = A.toarray()
+            self.sparse_jacobian = False
+        else:
+            # np.asarray makes sure A is ndarray and not matrix
+            self.J = np.atleast_2d(np.asarray(A))
+            self.sparse_jacobian = False
+        self.m, self.n = self.J.shape
+        self.xp = xp = array_namespace(x0)
+        _x = atleast_nd(x0, ndim=1, xp=xp)
+        _dtype = xp.float64
+        if xp.isdtype(_x.dtype, "real floating"):
+            _dtype = _x.dtype
+        # promotes to floating
+        self.x = xp.astype(_x, _dtype)
+        self.x_dtype = _dtype
+        self.f = self.J.dot(self.x)
+        self.f_updated = True
+        self.v = np.zeros(self.m, dtype=float)
+        self.H = sps.csr_matrix((self.n, self.n))
+    def _update_x(self, x):
+        if not np.array_equal(x, self.x):
+            _x = atleast_nd(x, ndim=1, xp=self.xp)
+            self.x = self.xp.astype(_x, self.x_dtype)
+            self.f_updated = False
+    def fun(self, x):
+        self._update_x(x)
+        if not self.f_updated:
+            self.f = self.J.dot(x)
+            self.f_updated = True
+        return self.f
+    def jac(self, x):
+        self._update_x(x)
+        return self.J
+    def hess(self, x, v):
+        self._update_x(x)
+        self.v = v
+        return self.H
+class IdentityVectorFunction(LinearVectorFunction):
+    """Identity vector function and its derivatives.
+    The Jacobian is the identity matrix, returned as a dense array when
+    `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
+    identically zero and it is returned as a csr matrix.
+    """
+    def __init__(self, x0, sparse_jacobian):
+        n = len(x0)
+        if sparse_jacobian or sparse_jacobian is None:
+            A = sps.eye(n, format='csr')
+            sparse_jacobian = True
+        else:
+            A = np.eye(n)
+            sparse_jacobian = False
+        super().__init__(A, x0, sparse_jacobian)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py ADDED Viewed

	@@ -0,0 +1,1951 @@

+"""
+differential_evolution: The differential evolution global optimization algorithm
+Added by Andrew Nelson 2014
+"""
+import warnings
+import numpy as np
+from scipy.optimize import OptimizeResult, minimize
+from scipy.optimize._optimize import _status_message, _wrap_callback
+from scipy._lib._util import (check_random_state, MapWrapper, _FunctionWrapper,
+                              rng_integers)
+from scipy.optimize._constraints import (Bounds, new_bounds_to_old,
+                                         NonlinearConstraint, LinearConstraint)
+from scipy.sparse import issparse
+__all__ = ['differential_evolution']
+_MACHEPS = np.finfo(np.float64).eps
+def differential_evolution(func, bounds, args=(), strategy='best1bin',
+                           maxiter=1000, popsize=15, tol=0.01,
+                           mutation=(0.5, 1), recombination=0.7, seed=None,
+                           callback=None, disp=False, polish=True,
+                           init='latinhypercube', atol=0, updating='immediate',
+                           workers=1, constraints=(), x0=None, *,
+                           integrality=None, vectorized=False):
+    """Finds the global minimum of a multivariate function.
+    The differential evolution method [1]_ is stochastic in nature. It does
+    not use gradient methods to find the minimum, and can search large areas
+    of candidate space, but often requires larger numbers of function
+    evaluations than conventional gradient-based techniques.
+    The algorithm is due to Storn and Price [2]_.
+    Parameters
+    ----------
+    func : callable
+        The objective function to be minimized. Must be in the form
+        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
+        and ``args`` is a tuple of any additional fixed parameters needed to
+        completely specify the function. The number of parameters, N, is equal
+        to ``len(x)``.
+    bounds : sequence or `Bounds`
+        Bounds for variables. There are two ways to specify the bounds:
+            1. Instance of `Bounds` class.
+            2. ``(min, max)`` pairs for each element in ``x``, defining the
+               finite lower and upper bounds for the optimizing argument of
+               `func`.
+        The total number of bounds is used to determine the number of
+        parameters, N. If there are parameters whose bounds are equal the total
+        number of free parameters is ``N - N_equal``.
+    args : tuple, optional
+        Any additional fixed parameters needed to
+        completely specify the objective function.
+    strategy : {str, callable}, optional
+        The differential evolution strategy to use. Should be one of:
+            - 'best1bin'
+            - 'best1exp'
+            - 'rand1bin'
+            - 'rand1exp'
+            - 'rand2bin'
+            - 'rand2exp'
+            - 'randtobest1bin'
+            - 'randtobest1exp'
+            - 'currenttobest1bin'
+            - 'currenttobest1exp'
+            - 'best2exp'
+            - 'best2bin'
+        The default is 'best1bin'. Strategies that may be implemented are
+        outlined in 'Notes'.
+        Alternatively the differential evolution strategy can be customized by
+        providing a callable that constructs a trial vector. The callable must
+        have the form ``strategy(candidate: int, population: np.ndarray, rng=None)``,
+        where ``candidate`` is an integer specifying which entry of the
+        population is being evolved, ``population`` is an array of shape
+        ``(S, N)`` containing all the population members (where S is the
+        total population size), and ``rng`` is the random number generator
+        being used within the solver.
+        ``candidate`` will be in the range ``[0, S)``.
+        ``strategy`` must return a trial vector with shape `(N,)`. The
+        fitness of this trial vector is compared against the fitness of
+        ``population[candidate]``.
+        .. versionchanged:: 1.12.0
+            Customization of evolution strategy via a callable.
+    maxiter : int, optional
+        The maximum number of generations over which the entire population is
+        evolved. The maximum number of function evaluations (with no polishing)
+        is: ``(maxiter + 1) * popsize * (N - N_equal)``
+    popsize : int, optional
+        A multiplier for setting the total population size. The population has
+        ``popsize * (N - N_equal)`` individuals. This keyword is overridden if
+        an initial population is supplied via the `init` keyword. When using
+        ``init='sobol'`` the population size is calculated as the next power
+        of 2 after ``popsize * (N - N_equal)``.
+    tol : float, optional
+        Relative tolerance for convergence, the solving stops when
+        ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
+        where and `atol` and `tol` are the absolute and relative tolerance
+        respectively.
+    mutation : float or tuple(float, float), optional
+        The mutation constant. In the literature this is also known as
+        differential weight, being denoted by F.
+        If specified as a float it should be in the range [0, 2].
+        If specified as a tuple ``(min, max)`` dithering is employed. Dithering
+        randomly changes the mutation constant on a generation by generation
+        basis. The mutation constant for that generation is taken from
+        ``U[min, max)``. Dithering can help speed convergence significantly.
+        Increasing the mutation constant increases the search radius, but will
+        slow down convergence.
+    recombination : float, optional
+        The recombination constant, should be in the range [0, 1]. In the
+        literature this is also known as the crossover probability, being
+        denoted by CR. Increasing this value allows a larger number of mutants
+        to progress into the next generation, but at the risk of population
+        stability.
+    seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+        Specify `seed` for repeatable minimizations.
+    disp : bool, optional
+        Prints the evaluated `func` at every iteration.
+    callback : callable, optional
+        A callable called after each iteration. Has the signature:
+            ``callback(intermediate_result: OptimizeResult)``
+        where ``intermediate_result`` is a keyword parameter containing an
+        `OptimizeResult` with attributes ``x`` and ``fun``, the best solution
+        found so far and the objective function. Note that the name
+        of the parameter must be ``intermediate_result`` for the callback
+        to be passed an `OptimizeResult`.
+        The callback also supports a signature like:
+            ``callback(x, convergence: float=val)``
+        ``val`` represents the fractional value of the population convergence.
+        When ``val`` is greater than ``1.0``, the function halts.
+        Introspection is used to determine which of the signatures is invoked.
+        Global minimization will halt if the callback raises ``StopIteration``
+        or returns ``True``; any polishing is still carried out.
+        .. versionchanged:: 1.12.0
+            callback accepts the ``intermediate_result`` keyword.
+    polish : bool, optional
+        If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
+        method is used to polish the best population member at the end, which
+        can improve the minimization slightly. If a constrained problem is
+        being studied then the `trust-constr` method is used instead. For large
+        problems with many constraints, polishing can take a long time due to
+        the Jacobian computations.
+    init : str or array-like, optional
+        Specify which type of population initialization is performed. Should be
+        one of:
+            - 'latinhypercube'
+            - 'sobol'
+            - 'halton'
+            - 'random'
+            - array specifying the initial population. The array should have
+              shape ``(S, N)``, where S is the total population size and N is
+              the number of parameters.
+              `init` is clipped to `bounds` before use.
+        The default is 'latinhypercube'. Latin Hypercube sampling tries to
+        maximize coverage of the available parameter space.
+        'sobol' and 'halton' are superior alternatives and maximize even more
+        the parameter space. 'sobol' will enforce an initial population
+        size which is calculated as the next power of 2 after
+        ``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
+        less efficient. See `scipy.stats.qmc` for more details.
+        'random' initializes the population randomly - this has the drawback
+        that clustering can occur, preventing the whole of parameter space
+        being covered. Use of an array to specify a population could be used,
+        for example, to create a tight bunch of initial guesses in an location
+        where the solution is known to exist, thereby reducing time for
+        convergence.
+    atol : float, optional
+        Absolute tolerance for convergence, the solving stops when
+        ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
+        where and `atol` and `tol` are the absolute and relative tolerance
+        respectively.
+    updating : {'immediate', 'deferred'}, optional
+        If ``'immediate'``, the best solution vector is continuously updated
+        within a single generation [4]_. This can lead to faster convergence as
+        trial vectors can take advantage of continuous improvements in the best
+        solution.
+        With ``'deferred'``, the best solution vector is updated once per
+        generation. Only ``'deferred'`` is compatible with parallelization or
+        vectorization, and the `workers` and `vectorized` keywords can
+        over-ride this option.
+        .. versionadded:: 1.2.0
+    workers : int or map-like callable, optional
+        If `workers` is an int the population is subdivided into `workers`
+        sections and evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(func, iterable)``.
+        This option will override the `updating` keyword to
+        ``updating='deferred'`` if ``workers != 1``.
+        This option overrides the `vectorized` keyword if ``workers != 1``.
+        Requires that `func` be pickleable.
+        .. versionadded:: 1.2.0
+    constraints : {NonLinearConstraint, LinearConstraint, Bounds}
+        Constraints on the solver, over and above those applied by the `bounds`
+        kwd. Uses the approach by Lampinen [5]_.
+        .. versionadded:: 1.4.0
+    x0 : None or array-like, optional
+        Provides an initial guess to the minimization. Once the population has
+        been initialized this vector replaces the first (best) member. This
+        replacement is done even if `init` is given an initial population.
+        ``x0.shape == (N,)``.
+        .. versionadded:: 1.7.0
+    integrality : 1-D array, optional
+        For each decision variable, a boolean value indicating whether the
+        decision variable is constrained to integer values. The array is
+        broadcast to ``(N,)``.
+        If any decision variables are constrained to be integral, they will not
+        be changed during polishing.
+        Only integer values lying between the lower and upper bounds are used.
+        If there are no integer values lying between the bounds then a
+        `ValueError` is raised.
+        .. versionadded:: 1.9.0
+    vectorized : bool, optional
+        If ``vectorized is True``, `func` is sent an `x` array with
+        ``x.shape == (N, S)``, and is expected to return an array of shape
+        ``(S,)``, where `S` is the number of solution vectors to be calculated.
+        If constraints are applied, each of the functions used to construct
+        a `Constraint` object should accept an `x` array with
+        ``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
+        `M` is the number of constraint components.
+        This option is an alternative to the parallelization offered by
+        `workers`, and may help in optimization speed by reducing interpreter
+        overhead from multiple function calls. This keyword is ignored if
+        ``workers != 1``.
+        This option will override the `updating` keyword to
+        ``updating='deferred'``.
+        See the notes section for further discussion on when to use
+        ``'vectorized'``, and when to use ``'workers'``.
+        .. versionadded:: 1.9.0
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a `OptimizeResult` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully,
+        ``message`` which describes the cause of the termination,
+        ``population`` the solution vectors present in the population, and
+        ``population_energies`` the value of the objective function for each
+        entry in ``population``.
+        See `OptimizeResult` for a description of other attributes. If `polish`
+        was employed, and a lower minimum was obtained by the polishing, then
+        OptimizeResult also contains the ``jac`` attribute.
+        If the eventual solution does not satisfy the applied constraints
+        ``success`` will be `False`.
+    Notes
+    -----
+    Differential evolution is a stochastic population based method that is
+    useful for global optimization problems. At each pass through the
+    population the algorithm mutates each candidate solution by mixing with
+    other candidate solutions to create a trial candidate. There are several
+    strategies [3]_ for creating trial candidates, which suit some problems
+    more than others. The 'best1bin' strategy is a good starting point for
+    many systems. In this strategy two members of the population are randomly
+    chosen. Their difference is used to mutate the best member (the 'best' in
+    'best1bin'), :math:`x_0`, so far:
+    .. math::
+        b' = x_0 + mutation * (x_{r_0} - x_{r_1})
+    A trial vector is then constructed. Starting with a randomly chosen ith
+    parameter the trial is sequentially filled (in modulo) with parameters
+    from ``b'`` or the original candidate. The choice of whether to use ``b'``
+    or the original candidate is made with a binomial distribution (the 'bin'
+    in 'best1bin') - a random number in [0, 1) is generated. If this number is
+    less than the `recombination` constant then the parameter is loaded from
+    ``b'``, otherwise it is loaded from the original candidate. The final
+    parameter is always loaded from ``b'``. Once the trial candidate is built
+    its fitness is assessed. If the trial is better than the original candidate
+    then it takes its place. If it is also better than the best overall
+    candidate it also replaces that.
+    The other strategies available are outlined in Qiang and
+    Mitchell (2014) [3]_.
+    .. math::
+            rand1* : b' = x_{r_0} + mutation*(x_{r_1} - x_{r_2})
+            rand2* : b' = x_{r_0} + mutation*(x_{r_1} + x_{r_2}
+                                                - x_{r_3} - x_{r_4})
+            best1* : b' = x_0 + mutation*(x_{r_0} - x_{r_1})
+            best2* : b' = x_0 + mutation*(x_{r_0} + x_{r_1}
+                                            - x_{r_2} - x_{r_3})
+            currenttobest1* : b' = x_i + mutation*(x_0 - x_i
+                                                     + x_{r_0} - x_{r_1})
+            randtobest1* : b' = x_{r_0} + mutation*(x_0 - x_{r_0}
+                                                      + x_{r_1} - x_{r_2})
+    where the integers :math:`r_0, r_1, r_2, r_3, r_4` are chosen randomly
+    from the interval [0, NP) with `NP` being the total population size and
+    the original candidate having index `i`. The user can fully customize the
+    generation of the trial candidates by supplying a callable to ``strategy``.
+    To improve your chances of finding a global minimum use higher `popsize`
+    values, with higher `mutation` and (dithering), but lower `recombination`
+    values. This has the effect of widening the search radius, but slowing
+    convergence.
+    By default the best solution vector is updated continuously within a single
+    iteration (``updating='immediate'``). This is a modification [4]_ of the
+    original differential evolution algorithm which can lead to faster
+    convergence as trial vectors can immediately benefit from improved
+    solutions. To use the original Storn and Price behaviour, updating the best
+    solution once per iteration, set ``updating='deferred'``.
+    The ``'deferred'`` approach is compatible with both parallelization and
+    vectorization (``'workers'`` and ``'vectorized'`` keywords). These may
+    improve minimization speed by using computer resources more efficiently.
+    The ``'workers'`` distribute calculations over multiple processors. By
+    default the Python `multiprocessing` module is used, but other approaches
+    are also possible, such as the Message Passing Interface (MPI) used on
+    clusters [6]_ [7]_. The overhead from these approaches (creating new
+    Processes, etc) may be significant, meaning that computational speed
+    doesn't necessarily scale with the number of processors used.
+    Parallelization is best suited to computationally expensive objective
+    functions. If the objective function is less expensive, then
+    ``'vectorized'`` may aid by only calling the objective function once per
+    iteration, rather than multiple times for all the population members; the
+    interpreter overhead is reduced.
+    .. versionadded:: 0.15.0
+    References
+    ----------
+    .. [1] Differential evolution, Wikipedia,
+           http://en.wikipedia.org/wiki/Differential_evolution
+    .. [2] Storn, R and Price, K, Differential Evolution - a Simple and
+           Efficient Heuristic for Global Optimization over Continuous Spaces,
+           Journal of Global Optimization, 1997, 11, 341 - 359.
+    .. [3] Qiang, J., Mitchell, C., A Unified Differential Evolution Algorithm
+            for Global Optimization, 2014, https://www.osti.gov/servlets/purl/1163659
+    .. [4] Wormington, M., Panaccione, C., Matney, K. M., Bowen, D. K., -
+           Characterization of structures from X-ray scattering data using
+           genetic algorithms, Phil. Trans. R. Soc. Lond. A, 1999, 357,
+           2827-2848
+    .. [5] Lampinen, J., A constraint handling approach for the differential
+           evolution algorithm. Proceedings of the 2002 Congress on
+           Evolutionary Computation. CEC'02 (Cat. No. 02TH8600). Vol. 2. IEEE,
+           2002.
+    .. [6] https://mpi4py.readthedocs.io/en/stable/
+    .. [7] https://schwimmbad.readthedocs.io/en/latest/
+    Examples
+    --------
+    Let us consider the problem of minimizing the Rosenbrock function. This
+    function is implemented in `rosen` in `scipy.optimize`.
+    >>> import numpy as np
+    >>> from scipy.optimize import rosen, differential_evolution
+    >>> bounds = [(0,2), (0, 2), (0, 2), (0, 2), (0, 2)]
+    >>> result = differential_evolution(rosen, bounds)
+    >>> result.x, result.fun
+    (array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
+    Now repeat, but with parallelization.
+    >>> result = differential_evolution(rosen, bounds, updating='deferred',
+    ...                                 workers=2)
+    >>> result.x, result.fun
+    (array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
+    Let's do a constrained minimization.
+    >>> from scipy.optimize import LinearConstraint, Bounds
+    We add the constraint that the sum of ``x[0]`` and ``x[1]`` must be less
+    than or equal to 1.9.  This is a linear constraint, which may be written
+    ``A @ x <= 1.9``, where ``A = array([[1, 1]])``.  This can be encoded as
+    a `LinearConstraint` instance:
+    >>> lc = LinearConstraint([[1, 1]], -np.inf, 1.9)
+    Specify limits using a `Bounds` object.
+    >>> bounds = Bounds([0., 0.], [2., 2.])
+    >>> result = differential_evolution(rosen, bounds, constraints=lc,
+    ...                                 seed=1)
+    >>> result.x, result.fun
+    (array([0.96632622, 0.93367155]), 0.0011352416852625719)
+    Next find the minimum of the Ackley function
+    (https://en.wikipedia.org/wiki/Test_functions_for_optimization).
+    >>> def ackley(x):
+    ...     arg1 = -0.2 * np.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2))
+    ...     arg2 = 0.5 * (np.cos(2. * np.pi * x[0]) + np.cos(2. * np.pi * x[1]))
+    ...     return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e
+    >>> bounds = [(-5, 5), (-5, 5)]
+    >>> result = differential_evolution(ackley, bounds, seed=1)
+    >>> result.x, result.fun
+    (array([0., 0.]), 4.440892098500626e-16)
+    The Ackley function is written in a vectorized manner, so the
+    ``'vectorized'`` keyword can be employed. Note the reduced number of
+    function evaluations.
+    >>> result = differential_evolution(
+    ...     ackley, bounds, vectorized=True, updating='deferred', seed=1
+    ... )
+    >>> result.x, result.fun
+    (array([0., 0.]), 4.440892098500626e-16)
+    The following custom strategy function mimics 'best1bin':
+    >>> def custom_strategy_fn(candidate, population, rng=None):
+    ...     parameter_count = population.shape(-1)
+    ...     mutation, recombination = 0.7, 0.9
+    ...     trial = np.copy(population[candidate])
+    ...     fill_point = rng.choice(parameter_count)
+    ...
+    ...     pool = np.arange(len(population))
+    ...     rng.shuffle(pool)
+    ...
+    ...     # two unique random numbers that aren't the same, and
+    ...     # aren't equal to candidate.
+    ...     idxs = []
+    ...     while len(idxs) < 2 and len(pool) > 0:
+    ...         idx = pool[0]
+    ...         pool = pool[1:]
+    ...         if idx != candidate:
+    ...             idxs.append(idx)
+    ...
+    ...     r0, r1 = idxs[:2]
+    ...
+    ...     bprime = (population[0] + mutation *
+    ...               (population[r0] - population[r1]))
+    ...
+    ...     crossovers = rng.uniform(size=parameter_count)
+    ...     crossovers = crossovers < recombination
+    ...     crossovers[fill_point] = True
+    ...     trial = np.where(crossovers, bprime, trial)
+    ...     return trial
+    """
+    # using a context manager means that any created Pool objects are
+    # cleared up.
+    with DifferentialEvolutionSolver(func, bounds, args=args,
+                                     strategy=strategy,
+                                     maxiter=maxiter,
+                                     popsize=popsize, tol=tol,
+                                     mutation=mutation,
+                                     recombination=recombination,
+                                     seed=seed, polish=polish,
+                                     callback=callback,
+                                     disp=disp, init=init, atol=atol,
+                                     updating=updating,
+                                     workers=workers,
+                                     constraints=constraints,
+                                     x0=x0,
+                                     integrality=integrality,
+                                     vectorized=vectorized) as solver:
+        ret = solver.solve()
+    return ret
+class DifferentialEvolutionSolver:
+    """This class implements the differential evolution solver
+    Parameters
+    ----------
+    func : callable
+        The objective function to be minimized. Must be in the form
+        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
+        and ``args`` is a tuple of any additional fixed parameters needed to
+        completely specify the function. The number of parameters, N, is equal
+        to ``len(x)``.
+    bounds : sequence or `Bounds`
+        Bounds for variables. There are two ways to specify the bounds:
+            1. Instance of `Bounds` class.
+            2. ``(min, max)`` pairs for each element in ``x``, defining the
+               finite lower and upper bounds for the optimizing argument of
+               `func`.
+        The total number of bounds is used to determine the number of
+        parameters, N. If there are parameters whose bounds are equal the total
+        number of free parameters is ``N - N_equal``.
+    args : tuple, optional
+        Any additional fixed parameters needed to
+        completely specify the objective function.
+    strategy : {str, callable}, optional
+        The differential evolution strategy to use. Should be one of:
+            - 'best1bin'
+            - 'best1exp'
+            - 'rand1bin'
+            - 'rand1exp'
+            - 'rand2bin'
+            - 'rand2exp'
+            - 'randtobest1bin'
+            - 'randtobest1exp'
+            - 'currenttobest1bin'
+            - 'currenttobest1exp'
+            - 'best2exp'
+            - 'best2bin'
+        The default is 'best1bin'. Strategies that may be
+        implemented are outlined in 'Notes'.
+        Alternatively the differential evolution strategy can be customized
+        by providing a callable that constructs a trial vector. The callable
+        must have the form
+        ``strategy(candidate: int, population: np.ndarray, rng=None)``,
+        where ``candidate`` is an integer specifying which entry of the
+        population is being evolved, ``population`` is an array of shape
+        ``(S, N)`` containing all the population members (where S is the
+        total population size), and ``rng`` is the random number generator
+        being used within the solver.
+        ``candidate`` will be in the range ``[0, S)``.
+        ``strategy`` must return a trial vector with shape `(N,)`. The
+        fitness of this trial vector is compared against the fitness of
+        ``population[candidate]``.
+    maxiter : int, optional
+        The maximum number of generations over which the entire population is
+        evolved. The maximum number of function evaluations (with no polishing)
+        is: ``(maxiter + 1) * popsize * (N - N_equal)``
+    popsize : int, optional
+        A multiplier for setting the total population size. The population has
+        ``popsize * (N - N_equal)`` individuals. This keyword is overridden if
+        an initial population is supplied via the `init` keyword. When using
+        ``init='sobol'`` the population size is calculated as the next power
+        of 2 after ``popsize * (N - N_equal)``.
+    tol : float, optional
+        Relative tolerance for convergence, the solving stops when
+        ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
+        where and `atol` and `tol` are the absolute and relative tolerance
+        respectively.
+    mutation : float or tuple(float, float), optional
+        The mutation constant. In the literature this is also known as
+        differential weight, being denoted by F.
+        If specified as a float it should be in the range [0, 2].
+        If specified as a tuple ``(min, max)`` dithering is employed. Dithering
+        randomly changes the mutation constant on a generation by generation
+        basis. The mutation constant for that generation is taken from
+        U[min, max). Dithering can help speed convergence significantly.
+        Increasing the mutation constant increases the search radius, but will
+        slow down convergence.
+    recombination : float, optional
+        The recombination constant, should be in the range [0, 1]. In the
+        literature this is also known as the crossover probability, being
+        denoted by CR. Increasing this value allows a larger number of mutants
+        to progress into the next generation, but at the risk of population
+        stability.
+    seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+        Specify `seed` for repeatable minimizations.
+    disp : bool, optional
+        Prints the evaluated `func` at every iteration.
+    callback : callable, optional
+        A callable called after each iteration. Has the signature:
+            ``callback(intermediate_result: OptimizeResult)``
+        where ``intermediate_result`` is a keyword parameter containing an
+        `OptimizeResult` with attributes ``x`` and ``fun``, the best solution
+        found so far and the objective function. Note that the name
+        of the parameter must be ``intermediate_result`` for the callback
+        to be passed an `OptimizeResult`.
+        The callback also supports a signature like:
+            ``callback(x, convergence: float=val)``
+        ``val`` represents the fractional value of the population convergence.
+         When ``val`` is greater than ``1.0``, the function halts.
+        Introspection is used to determine which of the signatures is invoked.
+        Global minimization will halt if the callback raises ``StopIteration``
+        or returns ``True``; any polishing is still carried out.
+        .. versionchanged:: 1.12.0
+            callback accepts the ``intermediate_result`` keyword.
+    polish : bool, optional
+        If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
+        method is used to polish the best population member at the end, which
+        can improve the minimization slightly. If a constrained problem is
+        being studied then the `trust-constr` method is used instead. For large
+        problems with many constraints, polishing can take a long time due to
+        the Jacobian computations.
+    maxfun : int, optional
+        Set the maximum number of function evaluations. However, it probably
+        makes more sense to set `maxiter` instead.
+    init : str or array-like, optional
+        Specify which type of population initialization is performed. Should be
+        one of:
+            - 'latinhypercube'
+            - 'sobol'
+            - 'halton'
+            - 'random'
+            - array specifying the initial population. The array should have
+              shape ``(S, N)``, where S is the total population size and
+              N is the number of parameters.
+              `init` is clipped to `bounds` before use.
+        The default is 'latinhypercube'. Latin Hypercube sampling tries to
+        maximize coverage of the available parameter space.
+        'sobol' and 'halton' are superior alternatives and maximize even more
+        the parameter space. 'sobol' will enforce an initial population
+        size which is calculated as the next power of 2 after
+        ``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
+        less efficient. See `scipy.stats.qmc` for more details.
+        'random' initializes the population randomly - this has the drawback
+        that clustering can occur, preventing the whole of parameter space
+        being covered. Use of an array to specify a population could be used,
+        for example, to create a tight bunch of initial guesses in an location
+        where the solution is known to exist, thereby reducing time for
+        convergence.
+    atol : float, optional
+        Absolute tolerance for convergence, the solving stops when
+        ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
+        where and `atol` and `tol` are the absolute and relative tolerance
+        respectively.
+    updating : {'immediate', 'deferred'}, optional
+        If ``'immediate'``, the best solution vector is continuously updated
+        within a single generation [4]_. This can lead to faster convergence as
+        trial vectors can take advantage of continuous improvements in the best
+        solution.
+        With ``'deferred'``, the best solution vector is updated once per
+        generation. Only ``'deferred'`` is compatible with parallelization or
+        vectorization, and the `workers` and `vectorized` keywords can
+        over-ride this option.
+    workers : int or map-like callable, optional
+        If `workers` is an int the population is subdivided into `workers`
+        sections and evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply `-1` to use all cores available to the Process.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(func, iterable)``.
+        This option will override the `updating` keyword to
+        `updating='deferred'` if `workers != 1`.
+        Requires that `func` be pickleable.
+    constraints : {NonLinearConstraint, LinearConstraint, Bounds}
+        Constraints on the solver, over and above those applied by the `bounds`
+        kwd. Uses the approach by Lampinen.
+    x0 : None or array-like, optional
+        Provides an initial guess to the minimization. Once the population has
+        been initialized this vector replaces the first (best) member. This
+        replacement is done even if `init` is given an initial population.
+        ``x0.shape == (N,)``.
+    integrality : 1-D array, optional
+        For each decision variable, a boolean value indicating whether the
+        decision variable is constrained to integer values. The array is
+        broadcast to ``(N,)``.
+        If any decision variables are constrained to be integral, they will not
+        be changed during polishing.
+        Only integer values lying between the lower and upper bounds are used.
+        If there are no integer values lying between the bounds then a
+        `ValueError` is raised.
+    vectorized : bool, optional
+        If ``vectorized is True``, `func` is sent an `x` array with
+        ``x.shape == (N, S)``, and is expected to return an array of shape
+        ``(S,)``, where `S` is the number of solution vectors to be calculated.
+        If constraints are applied, each of the functions used to construct
+        a `Constraint` object should accept an `x` array with
+        ``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
+        `M` is the number of constraint components.
+        This option is an alternative to the parallelization offered by
+        `workers`, and may help in optimization speed. This keyword is
+        ignored if ``workers != 1``.
+        This option will override the `updating` keyword to
+        ``updating='deferred'``.
+    """
+    # Dispatch of mutation strategy method (binomial or exponential).
+    _binomial = {'best1bin': '_best1',
+                 'randtobest1bin': '_randtobest1',
+                 'currenttobest1bin': '_currenttobest1',
+                 'best2bin': '_best2',
+                 'rand2bin': '_rand2',
+                 'rand1bin': '_rand1'}
+    _exponential = {'best1exp': '_best1',
+                    'rand1exp': '_rand1',
+                    'randtobest1exp': '_randtobest1',
+                    'currenttobest1exp': '_currenttobest1',
+                    'best2exp': '_best2',
+                    'rand2exp': '_rand2'}
+    __init_error_msg = ("The population initialization method must be one of "
+                        "'latinhypercube' or 'random', or an array of shape "
+                        "(S, N) where N is the number of parameters and S>5")
+    def __init__(self, func, bounds, args=(),
+                 strategy='best1bin', maxiter=1000, popsize=15,
+                 tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=None,
+                 maxfun=np.inf, callback=None, disp=False, polish=True,
+                 init='latinhypercube', atol=0, updating='immediate',
+                 workers=1, constraints=(), x0=None, *, integrality=None,
+                 vectorized=False):
+        if callable(strategy):
+            # a callable strategy is going to be stored in self.strategy anyway
+            pass
+        elif strategy in self._binomial:
+            self.mutation_func = getattr(self, self._binomial[strategy])
+        elif strategy in self._exponential:
+            self.mutation_func = getattr(self, self._exponential[strategy])
+        else:
+            raise ValueError("Please select a valid mutation strategy")
+        self.strategy = strategy
+        self.callback = _wrap_callback(callback, "differential_evolution")
+        self.polish = polish
+        # set the updating / parallelisation options
+        if updating in ['immediate', 'deferred']:
+            self._updating = updating
+        self.vectorized = vectorized
+        # want to use parallelisation, but updating is immediate
+        if workers != 1 and updating == 'immediate':
+            warnings.warn("differential_evolution: the 'workers' keyword has"
+                          " overridden updating='immediate' to"
+                          " updating='deferred'", UserWarning, stacklevel=2)
+            self._updating = 'deferred'
+        if vectorized and workers != 1:
+            warnings.warn("differential_evolution: the 'workers' keyword"
+                          " overrides the 'vectorized' keyword", stacklevel=2)
+            self.vectorized = vectorized = False
+        if vectorized and updating == 'immediate':
+            warnings.warn("differential_evolution: the 'vectorized' keyword"
+                          " has overridden updating='immediate' to updating"
+                          "='deferred'", UserWarning, stacklevel=2)
+            self._updating = 'deferred'
+        # an object with a map method.
+        if vectorized:
+            def maplike_for_vectorized_func(func, x):
+                # send an array (N, S) to the user func,
+                # expect to receive (S,). Transposition is required because
+                # internally the population is held as (S, N)
+                return np.atleast_1d(func(x.T))
+            workers = maplike_for_vectorized_func
+        self._mapwrapper = MapWrapper(workers)
+        # relative and absolute tolerances for convergence
+        self.tol, self.atol = tol, atol
+        # Mutation constant should be in [0, 2). If specified as a sequence
+        # then dithering is performed.
+        self.scale = mutation
+        if (not np.all(np.isfinite(mutation)) or
+                np.any(np.array(mutation) >= 2) or
+                np.any(np.array(mutation) < 0)):
+            raise ValueError('The mutation constant must be a float in '
+                             'U[0, 2), or specified as a tuple(min, max)'
+                             ' where min < max and min, max are in U[0, 2).')
+        self.dither = None
+        if hasattr(mutation, '__iter__') and len(mutation) > 1:
+            self.dither = [mutation[0], mutation[1]]
+            self.dither.sort()
+        self.cross_over_probability = recombination
+        # we create a wrapped function to allow the use of map (and Pool.map
+        # in the future)
+        self.func = _FunctionWrapper(func, args)
+        self.args = args
+        # convert tuple of lower and upper bounds to limits
+        # [(low_0, high_0), ..., (low_n, high_n]
+        #     -> [[low_0, ..., low_n], [high_0, ..., high_n]]
+        if isinstance(bounds, Bounds):
+            self.limits = np.array(new_bounds_to_old(bounds.lb,
+                                                     bounds.ub,
+                                                     len(bounds.lb)),
+                                   dtype=float).T
+        else:
+            self.limits = np.array(bounds, dtype='float').T
+        if (np.size(self.limits, 0) != 2 or not
+                np.all(np.isfinite(self.limits))):
+            raise ValueError('bounds should be a sequence containing finite '
+                             'real valued (min, max) pairs for each value'
+                             ' in x')
+        if maxiter is None:  # the default used to be None
+            maxiter = 1000
+        self.maxiter = maxiter
+        if maxfun is None:  # the default used to be None
+            maxfun = np.inf
+        self.maxfun = maxfun
+        # population is scaled to between [0, 1].
+        # We have to scale between parameter <-> population
+        # save these arguments for _scale_parameter and
+        # _unscale_parameter. This is an optimization
+        self.__scale_arg1 = 0.5 * (self.limits[0] + self.limits[1])
+        self.__scale_arg2 = np.fabs(self.limits[0] - self.limits[1])
+        with np.errstate(divide='ignore'):
+            # if lb == ub then the following line will be 1/0, which is why
+            # we ignore the divide by zero warning. The result from 1/0 is
+            # inf, so replace those values by 0.
+            self.__recip_scale_arg2 = 1 / self.__scale_arg2
+            self.__recip_scale_arg2[~np.isfinite(self.__recip_scale_arg2)] = 0
+        self.parameter_count = np.size(self.limits, 1)
+        self.random_number_generator = check_random_state(seed)
+        # Which parameters are going to be integers?
+        if np.any(integrality):
+            # # user has provided a truth value for integer constraints
+            integrality = np.broadcast_to(
+                integrality,
+                self.parameter_count
+            )
+            integrality = np.asarray(integrality, bool)
+            # For integrality parameters change the limits to only allow
+            # integer values lying between the limits.
+            lb, ub = np.copy(self.limits)
+            lb = np.ceil(lb)
+            ub = np.floor(ub)
+            if not (lb[integrality] <= ub[integrality]).all():
+                # there's a parameter that doesn't have an integer value
+                # lying between the limits
+                raise ValueError("One of the integrality constraints does not"
+                                 " have any possible integer values between"
+                                 " the lower/upper bounds.")
+            nlb = np.nextafter(lb[integrality] - 0.5, np.inf)
+            nub = np.nextafter(ub[integrality] + 0.5, -np.inf)
+            self.integrality = integrality
+            self.limits[0, self.integrality] = nlb
+            self.limits[1, self.integrality] = nub
+        else:
+            self.integrality = False
+        # check for equal bounds
+        eb = self.limits[0] == self.limits[1]
+        eb_count = np.count_nonzero(eb)
+        # default population initialization is a latin hypercube design, but
+        # there are other population initializations possible.
+        # the minimum is 5 because 'best2bin' requires a population that's at
+        # least 5 long
+        # 202301 - reduced population size to account for parameters with
+        # equal bounds. If there are no varying parameters set N to at least 1
+        self.num_population_members = max(
+            5,
+            popsize * max(1, self.parameter_count - eb_count)
+        )
+        self.population_shape = (self.num_population_members,
+                                 self.parameter_count)
+        self._nfev = 0
+        # check first str otherwise will fail to compare str with array
+        if isinstance(init, str):
+            if init == 'latinhypercube':
+                self.init_population_lhs()
+            elif init == 'sobol':
+                # must be Ns = 2**m for Sobol'
+                n_s = int(2 ** np.ceil(np.log2(self.num_population_members)))
+                self.num_population_members = n_s
+                self.population_shape = (self.num_population_members,
+                                         self.parameter_count)
+                self.init_population_qmc(qmc_engine='sobol')
+            elif init == 'halton':
+                self.init_population_qmc(qmc_engine='halton')
+            elif init == 'random':
+                self.init_population_random()
+            else:
+                raise ValueError(self.__init_error_msg)
+        else:
+            self.init_population_array(init)
+        if x0 is not None:
+            # scale to within unit interval and
+            # ensure parameters are within bounds.
+            x0_scaled = self._unscale_parameters(np.asarray(x0))
+            if ((x0_scaled > 1.0) | (x0_scaled < 0.0)).any():
+                raise ValueError(
+                    "Some entries in x0 lay outside the specified bounds"
+                )
+            self.population[0] = x0_scaled
+        # infrastructure for constraints
+        self.constraints = constraints
+        self._wrapped_constraints = []
+        if hasattr(constraints, '__len__'):
+            # sequence of constraints, this will also deal with default
+            # keyword parameter
+            for c in constraints:
+                self._wrapped_constraints.append(
+                    _ConstraintWrapper(c, self.x)
+                )
+        else:
+            self._wrapped_constraints = [
+                _ConstraintWrapper(constraints, self.x)
+            ]
+        self.total_constraints = np.sum(
+            [c.num_constr for c in self._wrapped_constraints]
+        )
+        self.constraint_violation = np.zeros((self.num_population_members, 1))
+        self.feasible = np.ones(self.num_population_members, bool)
+        # an array to shuffle when selecting candidates. Create it here
+        # rather than repeatedly creating it in _select_samples.
+        self._random_population_index = np.arange(self.num_population_members)
+        self.disp = disp
+    def init_population_lhs(self):
+        """
+        Initializes the population with Latin Hypercube Sampling.
+        Latin Hypercube Sampling ensures that each parameter is uniformly
+        sampled over its range.
+        """
+        rng = self.random_number_generator
+        # Each parameter range needs to be sampled uniformly. The scaled
+        # parameter range ([0, 1)) needs to be split into
+        # `self.num_population_members` segments, each of which has the following
+        # size:
+        segsize = 1.0 / self.num_population_members
+        # Within each segment we sample from a uniform random distribution.
+        # We need to do this sampling for each parameter.
+        samples = (segsize * rng.uniform(size=self.population_shape)
+        # Offset each segment to cover the entire parameter range [0, 1)
+                   + np.linspace(0., 1., self.num_population_members,
+                                 endpoint=False)[:, np.newaxis])
+        # Create an array for population of candidate solutions.
+        self.population = np.zeros_like(samples)
+        # Initialize population of candidate solutions by permutation of the
+        # random samples.
+        for j in range(self.parameter_count):
+            order = rng.permutation(range(self.num_population_members))
+            self.population[:, j] = samples[order, j]
+        # reset population energies
+        self.population_energies = np.full(self.num_population_members,
+                                           np.inf)
+        # reset number of function evaluations counter
+        self._nfev = 0
+    def init_population_qmc(self, qmc_engine):
+        """Initializes the population with a QMC method.
+        QMC methods ensures that each parameter is uniformly
+        sampled over its range.
+        Parameters
+        ----------
+        qmc_engine : str
+            The QMC method to use for initialization. Can be one of
+            ``latinhypercube``, ``sobol`` or ``halton``.
+        """
+        from scipy.stats import qmc
+        rng = self.random_number_generator
+        # Create an array for population of candidate solutions.
+        if qmc_engine == 'latinhypercube':
+            sampler = qmc.LatinHypercube(d=self.parameter_count, seed=rng)
+        elif qmc_engine == 'sobol':
+            sampler = qmc.Sobol(d=self.parameter_count, seed=rng)
+        elif qmc_engine == 'halton':
+            sampler = qmc.Halton(d=self.parameter_count, seed=rng)
+        else:
+            raise ValueError(self.__init_error_msg)
+        self.population = sampler.random(n=self.num_population_members)
+        # reset population energies
+        self.population_energies = np.full(self.num_population_members,
+                                           np.inf)
+        # reset number of function evaluations counter
+        self._nfev = 0
+    def init_population_random(self):
+        """
+        Initializes the population at random. This type of initialization
+        can possess clustering, Latin Hypercube sampling is generally better.
+        """
+        rng = self.random_number_generator
+        self.population = rng.uniform(size=self.population_shape)
+        # reset population energies
+        self.population_energies = np.full(self.num_population_members,
+                                           np.inf)
+        # reset number of function evaluations counter
+        self._nfev = 0
+    def init_population_array(self, init):
+        """
+        Initializes the population with a user specified population.
+        Parameters
+        ----------
+        init : np.ndarray
+            Array specifying subset of the initial population. The array should
+            have shape (S, N), where N is the number of parameters.
+            The population is clipped to the lower and upper bounds.
+        """
+        # make sure you're using a float array
+        popn = np.asarray(init, dtype=np.float64)
+        if (np.size(popn, 0) < 5 or
+                popn.shape[1] != self.parameter_count or
+                len(popn.shape) != 2):
+            raise ValueError("The population supplied needs to have shape"
+                             " (S, len(x)), where S > 4.")
+        # scale values and clip to bounds, assigning to population
+        self.population = np.clip(self._unscale_parameters(popn), 0, 1)
+        self.num_population_members = np.size(self.population, 0)
+        self.population_shape = (self.num_population_members,
+                                 self.parameter_count)
+        # reset population energies
+        self.population_energies = np.full(self.num_population_members,
+                                           np.inf)
+        # reset number of function evaluations counter
+        self._nfev = 0
+    @property
+    def x(self):
+        """
+        The best solution from the solver
+        """
+        return self._scale_parameters(self.population[0])
+    @property
+    def convergence(self):
+        """
+        The standard deviation of the population energies divided by their
+        mean.
+        """
+        if np.any(np.isinf(self.population_energies)):
+            return np.inf
+        return (np.std(self.population_energies) /
+                (np.abs(np.mean(self.population_energies)) + _MACHEPS))
+    def converged(self):
+        """
+        Return True if the solver has converged.
+        """
+        if np.any(np.isinf(self.population_energies)):
+            return False
+        return (np.std(self.population_energies) <=
+                self.atol +
+                self.tol * np.abs(np.mean(self.population_energies)))
+    def solve(self):
+        """
+        Runs the DifferentialEvolutionSolver.
+        Returns
+        -------
+        res : OptimizeResult
+            The optimization result represented as a `OptimizeResult` object.
+            Important attributes are: ``x`` the solution array, ``success`` a
+            Boolean flag indicating if the optimizer exited successfully,
+            ``message`` which describes the cause of the termination,
+            ``population`` the solution vectors present in the population, and
+            ``population_energies`` the value of the objective function for
+            each entry in ``population``.
+            See `OptimizeResult` for a description of other attributes. If
+            `polish` was employed, and a lower minimum was obtained by the
+            polishing, then OptimizeResult also contains the ``jac`` attribute.
+            If the eventual solution does not satisfy the applied constraints
+            ``success`` will be `False`.
+        """
+        nit, warning_flag = 0, False
+        status_message = _status_message['success']
+        # The population may have just been initialized (all entries are
+        # np.inf). If it has you have to calculate the initial energies.
+        # Although this is also done in the evolve generator it's possible
+        # that someone can set maxiter=0, at which point we still want the
+        # initial energies to be calculated (the following loop isn't run).
+        if np.all(np.isinf(self.population_energies)):
+            self.feasible, self.constraint_violation = (
+                self._calculate_population_feasibilities(self.population))
+            # only work out population energies for feasible solutions
+            self.population_energies[self.feasible] = (
+                self._calculate_population_energies(
+                    self.population[self.feasible]))
+            self._promote_lowest_energy()
+        # do the optimization.
+        for nit in range(1, self.maxiter + 1):
+            # evolve the population by a generation
+            try:
+                next(self)
+            except StopIteration:
+                warning_flag = True
+                if self._nfev > self.maxfun:
+                    status_message = _status_message['maxfev']
+                elif self._nfev == self.maxfun:
+                    status_message = ('Maximum number of function evaluations'
+                                      ' has been reached.')
+                break
+            if self.disp:
+                print(f"differential_evolution step {nit}: f(x)="
+                      f" {self.population_energies[0]}"
+                      )
+            if self.callback:
+                c = self.tol / (self.convergence + _MACHEPS)
+                res = self._result(nit=nit, message="in progress")
+                res.convergence = c
+                try:
+                    warning_flag = bool(self.callback(res))
+                except StopIteration:
+                    warning_flag = True
+                if warning_flag:
+                    status_message = 'callback function requested stop early'
+            # should the solver terminate?
+            if warning_flag or self.converged():
+                break
+        else:
+            status_message = _status_message['maxiter']
+            warning_flag = True
+        DE_result = self._result(
+            nit=nit, message=status_message, warning_flag=warning_flag
+        )
+        if self.polish and not np.all(self.integrality):
+            # can't polish if all the parameters are integers
+            if np.any(self.integrality):
+                # set the lower/upper bounds equal so that any integrality
+                # constraints work.
+                limits, integrality = self.limits, self.integrality
+                limits[0, integrality] = DE_result.x[integrality]
+                limits[1, integrality] = DE_result.x[integrality]
+            polish_method = 'L-BFGS-B'
+            if self._wrapped_constraints:
+                polish_method = 'trust-constr'
+                constr_violation = self._constraint_violation_fn(DE_result.x)
+                if np.any(constr_violation > 0.):
+                    warnings.warn("differential evolution didn't find a "
+                                  "solution satisfying the constraints, "
+                                  "attempting to polish from the least "
+                                  "infeasible solution",
+                                  UserWarning, stacklevel=2)
+            if self.disp:
+                print(f"Polishing solution with '{polish_method}'")
+            result = minimize(self.func,
+                              np.copy(DE_result.x),
+                              method=polish_method,
+                              bounds=self.limits.T,
+                              constraints=self.constraints)
+            self._nfev += result.nfev
+            DE_result.nfev = self._nfev
+            # Polishing solution is only accepted if there is an improvement in
+            # cost function, the polishing was successful and the solution lies
+            # within the bounds.
+            if (result.fun < DE_result.fun and
+                    result.success and
+                    np.all(result.x <= self.limits[1]) and
+                    np.all(self.limits[0] <= result.x)):
+                DE_result.fun = result.fun
+                DE_result.x = result.x
+                DE_result.jac = result.jac
+                # to keep internal state consistent
+                self.population_energies[0] = result.fun
+                self.population[0] = self._unscale_parameters(result.x)
+        if self._wrapped_constraints:
+            DE_result.constr = [c.violation(DE_result.x) for
+                                c in self._wrapped_constraints]
+            DE_result.constr_violation = np.max(
+                np.concatenate(DE_result.constr))
+            DE_result.maxcv = DE_result.constr_violation
+            if DE_result.maxcv > 0:
+                # if the result is infeasible then success must be False
+                DE_result.success = False
+                DE_result.message = ("The solution does not satisfy the "
+                                     f"constraints, MAXCV = {DE_result.maxcv}")
+        return DE_result
+    def _result(self, **kwds):
+        # form an intermediate OptimizeResult
+        nit = kwds.get('nit', None)
+        message = kwds.get('message', None)
+        warning_flag = kwds.get('warning_flag', False)
+        result = OptimizeResult(
+            x=self.x,
+            fun=self.population_energies[0],
+            nfev=self._nfev,
+            nit=nit,
+            message=message,
+            success=(warning_flag is not True),
+            population=self._scale_parameters(self.population),
+            population_energies=self.population_energies
+        )
+        if self._wrapped_constraints:
+            result.constr = [c.violation(result.x)
+                             for c in self._wrapped_constraints]
+            result.constr_violation = np.max(np.concatenate(result.constr))
+            result.maxcv = result.constr_violation
+            if result.maxcv > 0:
+                result.success = False
+        return result
+    def _calculate_population_energies(self, population):
+        """
+        Calculate the energies of a population.
+        Parameters
+        ----------
+        population : ndarray
+            An array of parameter vectors normalised to [0, 1] using lower
+            and upper limits. Has shape ``(np.size(population, 0), N)``.
+        Returns
+        -------
+        energies : ndarray
+            An array of energies corresponding to each population member. If
+            maxfun will be exceeded during this call, then the number of
+            function evaluations will be reduced and energies will be
+            right-padded with np.inf. Has shape ``(np.size(population, 0),)``
+        """
+        num_members = np.size(population, 0)
+        # S is the number of function evals left to stay under the
+        # maxfun budget
+        S = min(num_members, self.maxfun - self._nfev)
+        energies = np.full(num_members, np.inf)
+        parameters_pop = self._scale_parameters(population)
+        try:
+            calc_energies = list(
+                self._mapwrapper(self.func, parameters_pop[0:S])
+            )
+            calc_energies = np.squeeze(calc_energies)
+        except (TypeError, ValueError) as e:
+            # wrong number of arguments for _mapwrapper
+            # or wrong length returned from the mapper
+            raise RuntimeError(
+                "The map-like callable must be of the form f(func, iterable), "
+                "returning a sequence of numbers the same length as 'iterable'"
+            ) from e
+        if calc_energies.size != S:
+            if self.vectorized:
+                raise RuntimeError("The vectorized function must return an"
+                                   " array of shape (S,) when given an array"
+                                   " of shape (len(x), S)")
+            raise RuntimeError("func(x, *args) must return a scalar value")
+        energies[0:S] = calc_energies
+        if self.vectorized:
+            self._nfev += 1
+        else:
+            self._nfev += S
+        return energies
+    def _promote_lowest_energy(self):
+        # swaps 'best solution' into first population entry
+        idx = np.arange(self.num_population_members)
+        feasible_solutions = idx[self.feasible]
+        if feasible_solutions.size:
+            # find the best feasible solution
+            idx_t = np.argmin(self.population_energies[feasible_solutions])
+            l = feasible_solutions[idx_t]
+        else:
+            # no solution was feasible, use 'best' infeasible solution, which
+            # will violate constraints the least
+            l = np.argmin(np.sum(self.constraint_violation, axis=1))
+        self.population_energies[[0, l]] = self.population_energies[[l, 0]]
+        self.population[[0, l], :] = self.population[[l, 0], :]
+        self.feasible[[0, l]] = self.feasible[[l, 0]]
+        self.constraint_violation[[0, l], :] = (
+        self.constraint_violation[[l, 0], :])
+    def _constraint_violation_fn(self, x):
+        """
+        Calculates total constraint violation for all the constraints, for a
+        set of solutions.
+        Parameters
+        ----------
+        x : ndarray
+            Solution vector(s). Has shape (S, N), or (N,), where S is the
+            number of solutions to investigate and N is the number of
+            parameters.
+        Returns
+        -------
+        cv : ndarray
+            Total violation of constraints. Has shape ``(S, M)``, where M is
+            the total number of constraint components (which is not necessarily
+            equal to len(self._wrapped_constraints)).
+        """
+        # how many solution vectors you're calculating constraint violations
+        # for
+        S = np.size(x) // self.parameter_count
+        _out = np.zeros((S, self.total_constraints))
+        offset = 0
+        for con in self._wrapped_constraints:
+            # the input/output of the (vectorized) constraint function is
+            # {(N, S), (N,)} --> (M, S)
+            # The input to _constraint_violation_fn is (S, N) or (N,), so
+            # transpose to pass it to the constraint. The output is transposed
+            # from (M, S) to (S, M) for further use.
+            c = con.violation(x.T).T
+            # The shape of c should be (M,), (1, M), or (S, M). Check for
+            # those shapes, as an incorrect shape indicates that the
+            # user constraint function didn't return the right thing, and
+            # the reshape operation will fail. Intercept the wrong shape
+            # to give a reasonable error message. I'm not sure what failure
+            # modes an inventive user will come up with.
+            if c.shape[-1] != con.num_constr or (S > 1 and c.shape[0] != S):
+                raise RuntimeError("An array returned from a Constraint has"
+                                   " the wrong shape. If `vectorized is False`"
+                                   " the Constraint should return an array of"
+                                   " shape (M,). If `vectorized is True` then"
+                                   " the Constraint must return an array of"
+                                   " shape (M, S), where S is the number of"
+                                   " solution vectors and M is the number of"
+                                   " constraint components in a given"
+                                   " Constraint object.")
+            # the violation function may return a 1D array, but is it a
+            # sequence of constraints for one solution (S=1, M>=1), or the
+            # value of a single constraint for a sequence of solutions
+            # (S>=1, M=1)
+            c = np.reshape(c, (S, con.num_constr))
+            _out[:, offset:offset + con.num_constr] = c
+            offset += con.num_constr
+        return _out
+    def _calculate_population_feasibilities(self, population):
+        """
+        Calculate the feasibilities of a population.
+        Parameters
+        ----------
+        population : ndarray
+            An array of parameter vectors normalised to [0, 1] using lower
+            and upper limits. Has shape ``(np.size(population, 0), N)``.
+        Returns
+        -------
+        feasible, constraint_violation : ndarray, ndarray
+            Boolean array of feasibility for each population member, and an
+            array of the constraint violation for each population member.
+            constraint_violation has shape ``(np.size(population, 0), M)``,
+            where M is the number of constraints.
+        """
+        num_members = np.size(population, 0)
+        if not self._wrapped_constraints:
+            # shortcut for no constraints
+            return np.ones(num_members, bool), np.zeros((num_members, 1))
+        # (S, N)
+        parameters_pop = self._scale_parameters(population)
+        if self.vectorized:
+            # (S, M)
+            constraint_violation = np.array(
+                self._constraint_violation_fn(parameters_pop)
+            )
+        else:
+            # (S, 1, M)
+            constraint_violation = np.array([self._constraint_violation_fn(x)
+                                             for x in parameters_pop])
+            # if you use the list comprehension in the line above it will
+            # create an array of shape (S, 1, M), because each iteration
+            # generates an array of (1, M). In comparison the vectorized
+            # version returns (S, M). It's therefore necessary to remove axis 1
+            constraint_violation = constraint_violation[:, 0]
+        feasible = ~(np.sum(constraint_violation, axis=1) > 0)
+        return feasible, constraint_violation
+    def __iter__(self):
+        return self
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        return self._mapwrapper.__exit__(*args)
+    def _accept_trial(self, energy_trial, feasible_trial, cv_trial,
+                      energy_orig, feasible_orig, cv_orig):
+        """
+        Trial is accepted if:
+        * it satisfies all constraints and provides a lower or equal objective
+          function value, while both the compared solutions are feasible
+        - or -
+        * it is feasible while the original solution is infeasible,
+        - or -
+        * it is infeasible, but provides a lower or equal constraint violation
+          for all constraint functions.
+        This test corresponds to section III of Lampinen [1]_.
+        Parameters
+        ----------
+        energy_trial : float
+            Energy of the trial solution
+        feasible_trial : float
+            Feasibility of trial solution
+        cv_trial : array-like
+            Excess constraint violation for the trial solution
+        energy_orig : float
+            Energy of the original solution
+        feasible_orig : float
+            Feasibility of original solution
+        cv_orig : array-like
+            Excess constraint violation for the original solution
+        Returns
+        -------
+        accepted : bool
+        """
+        if feasible_orig and feasible_trial:
+            return energy_trial <= energy_orig
+        elif feasible_trial and not feasible_orig:
+            return True
+        elif not feasible_trial and (cv_trial <= cv_orig).all():
+            # cv_trial < cv_orig would imply that both trial and orig are not
+            # feasible
+            return True
+        return False
+    def __next__(self):
+        """
+        Evolve the population by a single generation
+        Returns
+        -------
+        x : ndarray
+            The best solution from the solver.
+        fun : float
+            Value of objective function obtained from the best solution.
+        """
+        # the population may have just been initialized (all entries are
+        # np.inf). If it has you have to calculate the initial energies
+        if np.all(np.isinf(self.population_energies)):
+            self.feasible, self.constraint_violation = (
+                self._calculate_population_feasibilities(self.population))
+            # only need to work out population energies for those that are
+            # feasible
+            self.population_energies[self.feasible] = (
+                self._calculate_population_energies(
+                    self.population[self.feasible]))
+            self._promote_lowest_energy()
+        if self.dither is not None:
+            self.scale = self.random_number_generator.uniform(self.dither[0],
+                                                              self.dither[1])
+        if self._updating == 'immediate':
+            # update best solution immediately
+            for candidate in range(self.num_population_members):
+                if self._nfev > self.maxfun:
+                    raise StopIteration
+                # create a trial solution
+                trial = self._mutate(candidate)
+                # ensuring that it's in the range [0, 1)
+                self._ensure_constraint(trial)
+                # scale from [0, 1) to the actual parameter value
+                parameters = self._scale_parameters(trial)
+                # determine the energy of the objective function
+                if self._wrapped_constraints:
+                    cv = self._constraint_violation_fn(parameters)
+                    feasible = False
+                    energy = np.inf
+                    if not np.sum(cv) > 0:
+                        # solution is feasible
+                        feasible = True
+                        energy = self.func(parameters)
+                        self._nfev += 1
+                else:
+                    feasible = True
+                    cv = np.atleast_2d([0.])
+                    energy = self.func(parameters)
+                    self._nfev += 1
+                # compare trial and population member
+                if self._accept_trial(energy, feasible, cv,
+                                      self.population_energies[candidate],
+                                      self.feasible[candidate],
+                                      self.constraint_violation[candidate]):
+                    self.population[candidate] = trial
+                    self.population_energies[candidate] = np.squeeze(energy)
+                    self.feasible[candidate] = feasible
+                    self.constraint_violation[candidate] = cv
+                    # if the trial candidate is also better than the best
+                    # solution then promote it.
+                    if self._accept_trial(energy, feasible, cv,
+                                          self.population_energies[0],
+                                          self.feasible[0],
+                                          self.constraint_violation[0]):
+                        self._promote_lowest_energy()
+        elif self._updating == 'deferred':
+            # update best solution once per generation
+            if self._nfev >= self.maxfun:
+                raise StopIteration
+            # 'deferred' approach, vectorised form.
+            # create trial solutions
+            trial_pop = self._mutate_many(
+                np.arange(self.num_population_members)
+            )
+            # enforce bounds
+            self._ensure_constraint(trial_pop)
+            # determine the energies of the objective function, but only for
+            # feasible trials
+            feasible, cv = self._calculate_population_feasibilities(trial_pop)
+            trial_energies = np.full(self.num_population_members, np.inf)
+            # only calculate for feasible entries
+            trial_energies[feasible] = self._calculate_population_energies(
+                trial_pop[feasible])
+            # which solutions are 'improved'?
+            loc = [self._accept_trial(*val) for val in
+                   zip(trial_energies, feasible, cv, self.population_energies,
+                       self.feasible, self.constraint_violation)]
+            loc = np.array(loc)
+            self.population = np.where(loc[:, np.newaxis],
+                                       trial_pop,
+                                       self.population)
+            self.population_energies = np.where(loc,
+                                                trial_energies,
+                                                self.population_energies)
+            self.feasible = np.where(loc,
+                                     feasible,
+                                     self.feasible)
+            self.constraint_violation = np.where(loc[:, np.newaxis],
+                                                 cv,
+                                                 self.constraint_violation)
+            # make sure the best solution is updated if updating='deferred'.
+            # put the lowest energy into the best solution position.
+            self._promote_lowest_energy()
+        return self.x, self.population_energies[0]
+    def _scale_parameters(self, trial):
+        """Scale from a number between 0 and 1 to parameters."""
+        # trial either has shape (N, ) or (L, N), where L is the number of
+        # solutions being scaled
+        scaled = self.__scale_arg1 + (trial - 0.5) * self.__scale_arg2
+        if np.count_nonzero(self.integrality):
+            i = np.broadcast_to(self.integrality, scaled.shape)
+            scaled[i] = np.round(scaled[i])
+        return scaled
+    def _unscale_parameters(self, parameters):
+        """Scale from parameters to a number between 0 and 1."""
+        return (parameters - self.__scale_arg1) * self.__recip_scale_arg2 + 0.5
+    def _ensure_constraint(self, trial):
+        """Make sure the parameters lie between the limits."""
+        mask = np.bitwise_or(trial > 1, trial < 0)
+        if oob := np.count_nonzero(mask):
+            trial[mask] = self.random_number_generator.uniform(size=oob)
+    def _mutate_custom(self, candidate):
+        rng = self.random_number_generator
+        msg = (
+            "strategy must have signature"
+            " f(candidate: int, population: np.ndarray, rng=None) returning an"
+            " array of shape (N,)"
+        )
+        _population = self._scale_parameters(self.population)
+        if not len(np.shape(candidate)):
+            # single entry in population
+            trial = self.strategy(candidate, _population, rng=rng)
+            if trial.shape != (self.parameter_count,):
+                raise RuntimeError(msg)
+        else:
+            S = candidate.shape[0]
+            trial = np.array(
+                [self.strategy(c, _population, rng=rng) for c in candidate],
+                dtype=float
+            )
+            if trial.shape != (S, self.parameter_count):
+                raise RuntimeError(msg)
+        return self._unscale_parameters(trial)
+    def _mutate_many(self, candidates):
+        """Create trial vectors based on a mutation strategy."""
+        rng = self.random_number_generator
+        S = len(candidates)
+        if callable(self.strategy):
+            return self._mutate_custom(candidates)
+        trial = np.copy(self.population[candidates])
+        samples = np.array([self._select_samples(c, 5) for c in candidates])
+        if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
+            bprime = self.mutation_func(candidates, samples)
+        else:
+            bprime = self.mutation_func(samples)
+        fill_point = rng_integers(rng, self.parameter_count, size=S)
+        crossovers = rng.uniform(size=(S, self.parameter_count))
+        crossovers = crossovers < self.cross_over_probability
+        if self.strategy in self._binomial:
+            # the last one is always from the bprime vector for binomial
+            # If you fill in modulo with a loop you have to set the last one to
+            # true. If you don't use a loop then you can have any random entry
+            # be True.
+            i = np.arange(S)
+            crossovers[i, fill_point[i]] = True
+            trial = np.where(crossovers, bprime, trial)
+            return trial
+        elif self.strategy in self._exponential:
+            crossovers[..., 0] = True
+            for j in range(S):
+                i = 0
+                init_fill = fill_point[j]
+                while (i < self.parameter_count and crossovers[j, i]):
+                    trial[j, init_fill] = bprime[j, init_fill]
+                    init_fill = (init_fill + 1) % self.parameter_count
+                    i += 1
+            return trial
+    def _mutate(self, candidate):
+        """Create a trial vector based on a mutation strategy."""
+        rng = self.random_number_generator
+        if callable(self.strategy):
+            return self._mutate_custom(candidate)
+        fill_point = rng_integers(rng, self.parameter_count)
+        samples = self._select_samples(candidate, 5)
+        trial = np.copy(self.population[candidate])
+        if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
+            bprime = self.mutation_func(candidate, samples)
+        else:
+            bprime = self.mutation_func(samples)
+        crossovers = rng.uniform(size=self.parameter_count)
+        crossovers = crossovers < self.cross_over_probability
+        if self.strategy in self._binomial:
+            # the last one is always from the bprime vector for binomial
+            # If you fill in modulo with a loop you have to set the last one to
+            # true. If you don't use a loop then you can have any random entry
+            # be True.
+            crossovers[fill_point] = True
+            trial = np.where(crossovers, bprime, trial)
+            return trial
+        elif self.strategy in self._exponential:
+            i = 0
+            crossovers[0] = True
+            while i < self.parameter_count and crossovers[i]:
+                trial[fill_point] = bprime[fill_point]
+                fill_point = (fill_point + 1) % self.parameter_count
+                i += 1
+            return trial
+    def _best1(self, samples):
+        """best1bin, best1exp"""
+        # samples.shape == (S, 5)
+        # or
+        # samples.shape(5,)
+        r0, r1 = samples[..., :2].T
+        return (self.population[0] + self.scale *
+                (self.population[r0] - self.population[r1]))
+    def _rand1(self, samples):
+        """rand1bin, rand1exp"""
+        r0, r1, r2 = samples[..., :3].T
+        return (self.population[r0] + self.scale *
+                (self.population[r1] - self.population[r2]))
+    def _randtobest1(self, samples):
+        """randtobest1bin, randtobest1exp"""
+        r0, r1, r2 = samples[..., :3].T
+        bprime = np.copy(self.population[r0])
+        bprime += self.scale * (self.population[0] - bprime)
+        bprime += self.scale * (self.population[r1] -
+                                self.population[r2])
+        return bprime
+    def _currenttobest1(self, candidate, samples):
+        """currenttobest1bin, currenttobest1exp"""
+        r0, r1 = samples[..., :2].T
+        bprime = (self.population[candidate] + self.scale *
+                  (self.population[0] - self.population[candidate] +
+                   self.population[r0] - self.population[r1]))
+        return bprime
+    def _best2(self, samples):
+        """best2bin, best2exp"""
+        r0, r1, r2, r3 = samples[..., :4].T
+        bprime = (self.population[0] + self.scale *
+                  (self.population[r0] + self.population[r1] -
+                   self.population[r2] - self.population[r3]))
+        return bprime
+    def _rand2(self, samples):
+        """rand2bin, rand2exp"""
+        r0, r1, r2, r3, r4 = samples[..., :5].T
+        bprime = (self.population[r0] + self.scale *
+                  (self.population[r1] + self.population[r2] -
+                   self.population[r3] - self.population[r4]))
+        return bprime
+    def _select_samples(self, candidate, number_samples):
+        """
+        obtain random integers from range(self.num_population_members),
+        without replacement. You can't have the original candidate either.
+        """
+        self.random_number_generator.shuffle(self._random_population_index)
+        idxs = self._random_population_index[:number_samples + 1]
+        return idxs[idxs != candidate][:number_samples]
+class _ConstraintWrapper:
+    """Object to wrap/evaluate user defined constraints.
+    Very similar in practice to `PreparedConstraint`, except that no evaluation
+    of jac/hess is performed (explicit or implicit).
+    If created successfully, it will contain the attributes listed below.
+    Parameters
+    ----------
+    constraint : {`NonlinearConstraint`, `LinearConstraint`, `Bounds`}
+        Constraint to check and prepare.
+    x0 : array_like
+        Initial vector of independent variables, shape (N,)
+    Attributes
+    ----------
+    fun : callable
+        Function defining the constraint wrapped by one of the convenience
+        classes.
+    bounds : 2-tuple
+        Contains lower and upper bounds for the constraints --- lb and ub.
+        These are converted to ndarray and have a size equal to the number of
+        the constraints.
+    Notes
+    -----
+    _ConstraintWrapper.fun and _ConstraintWrapper.violation can get sent
+    arrays of shape (N, S) or (N,), where S is the number of vectors of shape
+    (N,) to consider constraints for.
+    """
+    def __init__(self, constraint, x0):
+        self.constraint = constraint
+        if isinstance(constraint, NonlinearConstraint):
+            def fun(x):
+                x = np.asarray(x)
+                return np.atleast_1d(constraint.fun(x))
+        elif isinstance(constraint, LinearConstraint):
+            def fun(x):
+                if issparse(constraint.A):
+                    A = constraint.A
+                else:
+                    A = np.atleast_2d(constraint.A)
+                res = A.dot(x)
+                # x either has shape (N, S) or (N)
+                # (M, N) x (N, S) --> (M, S)
+                # (M, N) x (N,)   --> (M,)
+                # However, if (M, N) is a matrix then:
+                # (M, N) * (N,)   --> (M, 1), we need this to be (M,)
+                if x.ndim == 1 and res.ndim == 2:
+                    # deal with case that constraint.A is an np.matrix
+                    # see gh20041
+                    res = np.asarray(res)[:, 0]
+                return res
+        elif isinstance(constraint, Bounds):
+            def fun(x):
+                return np.asarray(x)
+        else:
+            raise ValueError("`constraint` of an unknown type is passed.")
+        self.fun = fun
+        lb = np.asarray(constraint.lb, dtype=float)
+        ub = np.asarray(constraint.ub, dtype=float)
+        x0 = np.asarray(x0)
+        # find out the number of constraints
+        f0 = fun(x0)
+        self.num_constr = m = f0.size
+        self.parameter_count = x0.size
+        if lb.ndim == 0:
+            lb = np.resize(lb, m)
+        if ub.ndim == 0:
+            ub = np.resize(ub, m)
+        self.bounds = (lb, ub)
+    def __call__(self, x):
+        return np.atleast_1d(self.fun(x))
+    def violation(self, x):
+        """How much the constraint is exceeded by.
+        Parameters
+        ----------
+        x : array-like
+            Vector of independent variables, (N, S), where N is number of
+            parameters and S is the number of solutions to be investigated.
+        Returns
+        -------
+        excess : array-like
+            How much the constraint is exceeded by, for each of the
+            constraints specified by `_ConstraintWrapper.fun`.
+            Has shape (M, S) where M is the number of constraint components.
+        """
+        # expect ev to have shape (num_constr, S) or (num_constr,)
+        ev = self.fun(np.asarray(x))
+        try:
+            excess_lb = np.maximum(self.bounds[0] - ev.T, 0)
+            excess_ub = np.maximum(ev.T - self.bounds[1], 0)
+        except ValueError as e:
+            raise RuntimeError("An array returned from a Constraint has"
+                               " the wrong shape. If `vectorized is False`"
+                               " the Constraint should return an array of"
+                               " shape (M,). If `vectorized is True` then"
+                               " the Constraint must return an array of"
+                               " shape (M, S), where S is the number of"
+                               " solution vectors and M is the number of"
+                               " constraint components in a given"
+                               " Constraint object.") from e
+        v = (excess_lb + excess_ub).T
+        return v

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py ADDED Viewed

	@@ -0,0 +1,856 @@

+# mypy: disable-error-code="attr-defined"
+import numpy as np
+import scipy._lib._elementwise_iterative_method as eim
+from scipy._lib._util import _RichResult
+_EERRORINCREASE = -1  # used in _differentiate
+def _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
+                      step_factor, step_direction, preserve_shape, callback):
+    # Input validation for `_differentiate`
+    if not callable(func):
+        raise ValueError('`func` must be callable.')
+    # x has more complex IV that is taken care of during initialization
+    x = np.asarray(x)
+    dtype = x.dtype if np.issubdtype(x.dtype, np.inexact) else np.float64
+    if not np.iterable(args):
+        args = (args,)
+    if atol is None:
+        atol = np.finfo(dtype).tiny
+    if rtol is None:
+        rtol = np.sqrt(np.finfo(dtype).eps)
+    message = 'Tolerances and step parameters must be non-negative scalars.'
+    tols = np.asarray([atol, rtol, initial_step, step_factor])
+    if (not np.issubdtype(tols.dtype, np.number)
+            or np.any(tols < 0)
+            or tols.shape != (4,)):
+        raise ValueError(message)
+    initial_step, step_factor = tols[2:].astype(dtype)
+    maxiter_int = int(maxiter)
+    if maxiter != maxiter_int or maxiter <= 0:
+        raise ValueError('`maxiter` must be a positive integer.')
+    order_int = int(order)
+    if order_int != order or order <= 0:
+        raise ValueError('`order` must be a positive integer.')
+    step_direction = np.sign(step_direction).astype(dtype)
+    x, step_direction = np.broadcast_arrays(x, step_direction)
+    x, step_direction = x[()], step_direction[()]
+    message = '`preserve_shape` must be True or False.'
+    if preserve_shape not in {True, False}:
+        raise ValueError(message)
+    if callback is not None and not callable(callback):
+        raise ValueError('`callback` must be callable.')
+    return (func, x, args, atol, rtol, maxiter_int, order_int, initial_step,
+            step_factor, step_direction, preserve_shape, callback)
+def _differentiate(func, x, *, args=(), atol=None, rtol=None, maxiter=10,
+                   order=8, initial_step=0.5, step_factor=2.0,
+                   step_direction=0, preserve_shape=False, callback=None):
+    """Evaluate the derivative of an elementwise scalar function numerically.
+    Parameters
+    ----------
+    func : callable
+        The function whose derivative is desired. The signature must be::
+            func(x: ndarray, *fargs) -> ndarray
+         where each element of ``x`` is a finite real number and ``fargs`` is a tuple,
+         which may contain an arbitrary number of arrays that are broadcastable
+         with `x`. ``func`` must be an elementwise function: each element
+         ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
+    x : array_like
+        Abscissae at which to evaluate the derivative.
+    args : tuple, optional
+        Additional positional arguments to be passed to `func`. Must be arrays
+        broadcastable with `x`. If the callable to be differentiated requires
+        arguments that are not broadcastable with `x`, wrap that callable with
+        `func`. See Examples.
+    atol, rtol : float, optional
+        Absolute and relative tolerances for the stopping condition: iteration
+        will stop when ``res.error < atol + rtol * abs(res.df)``. The default
+        `atol` is the smallest normal number of the appropriate dtype, and
+        the default `rtol` is the square root of the precision of the
+        appropriate dtype.
+    order : int, default: 8
+        The (positive integer) order of the finite difference formula to be
+        used. Odd integers will be rounded up to the next even integer.
+    initial_step : float, default: 0.5
+        The (absolute) initial step size for the finite difference derivative
+        approximation.
+    step_factor : float, default: 2.0
+        The factor by which the step size is *reduced* in each iteration; i.e.
+        the step size in iteration 1 is ``initial_step/step_factor``. If
+        ``step_factor < 1``, subsequent steps will be greater than the initial
+        step; this may be useful if steps smaller than some threshold are
+        undesirable (e.g. due to subtractive cancellation error).
+    maxiter : int, default: 10
+        The maximum number of iterations of the algorithm to perform. See
+        notes.
+    step_direction : array_like
+        An array representing the direction of the finite difference steps (for
+        use when `x` lies near to the boundary of the domain of the function.)
+        Must be broadcastable with `x` and all `args`.
+        Where 0 (default), central differences are used; where negative (e.g.
+        -1), steps are non-positive; and where positive (e.g. 1), all steps are
+        non-negative.
+    preserve_shape : bool, default: False
+        In the following, "arguments of `func`" refers to the array ``x`` and
+        any arrays within ``fargs``. Let ``shape`` be the broadcasted shape
+        of `x` and all elements of `args` (which is conceptually
+        distinct from ``fargs`` passed into `f`).
+        - When ``preserve_shape=False`` (default), `f` must accept arguments
+          of *any* broadcastable shapes.
+        - When ``preserve_shape=True``, `f` must accept arguments of shape
+          ``shape`` *or* ``shape + (n,)``, where ``(n,)`` is the number of
+          abscissae at which the function is being evaluated.
+        In either case, for each scalar element ``xi`` within `x`, the array
+        returned by `f` must include the scalar ``f(xi)`` at the same index.
+        Consequently, the shape of the output is always the shape of the input
+        ``x``.
+        See Examples.
+    callback : callable, optional
+        An optional user-supplied function to be called before the first
+        iteration and after each iteration.
+        Called as ``callback(res)``, where ``res`` is a ``_RichResult``
+        similar to that returned by `_differentiate` (but containing the
+        current iterate's values of all variables). If `callback` raises a
+        ``StopIteration``, the algorithm will terminate immediately and
+        `_differentiate` will return a result.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes. (The descriptions are written as though the values will be
+        scalars; however, if `func` returns an array, the outputs will be
+        arrays of the same shape.)
+        success : bool
+            ``True`` when the algorithm terminated successfully (status ``0``).
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : The algorithm converged to the specified tolerances.
+            ``-1`` : The error estimate increased, so iteration was terminated.
+            ``-2`` : The maximum number of iterations was reached.
+            ``-3`` : A non-finite value was encountered.
+            ``-4`` : Iteration was terminated by `callback`.
+            ``1`` : The algorithm is proceeding normally (in `callback` only).
+        df : float
+            The derivative of `func` at `x`, if the algorithm terminated
+            successfully.
+        error : float
+            An estimate of the error: the magnitude of the difference between
+            the current estimate of the derivative and the estimate in the
+            previous iteration.
+        nit : int
+            The number of iterations performed.
+        nfev : int
+            The number of points at which `func` was evaluated.
+        x : float
+            The value at which the derivative of `func` was evaluated
+            (after broadcasting with `args` and `step_direction`).
+    Notes
+    -----
+    The implementation was inspired by jacobi [1]_, numdifftools [2]_, and
+    DERIVEST [3]_, but the implementation follows the theory of Taylor series
+    more straightforwardly (and arguably naively so).
+    In the first iteration, the derivative is estimated using a finite
+    difference formula of order `order` with maximum step size `initial_step`.
+    Each subsequent iteration, the maximum step size is reduced by
+    `step_factor`, and the derivative is estimated again until a termination
+    condition is reached. The error estimate is the magnitude of the difference
+    between the current derivative approximation and that of the previous
+    iteration.
+    The stencils of the finite difference formulae are designed such that
+    abscissae are "nested": after `func` is evaluated at ``order + 1``
+    points in the first iteration, `func` is evaluated at only two new points
+    in each subsequent iteration; ``order - 1`` previously evaluated function
+    values required by the finite difference formula are reused, and two
+    function values (evaluations at the points furthest from `x`) are unused.
+    Step sizes are absolute. When the step size is small relative to the
+    magnitude of `x`, precision is lost; for example, if `x` is ``1e20``, the
+    default initial step size of ``0.5`` cannot be resolved. Accordingly,
+    consider using larger initial step sizes for large magnitudes of `x`.
+    The default tolerances are challenging to satisfy at points where the
+    true derivative is exactly zero. If the derivative may be exactly zero,
+    consider specifying an absolute tolerance (e.g. ``atol=1e-16``) to
+    improve convergence.
+    References
+    ----------
+    [1]_ Hans Dembinski (@HDembinski). jacobi.
+         https://github.com/HDembinski/jacobi
+    [2]_ Per A. Brodtkorb and John D'Errico. numdifftools.
+         https://numdifftools.readthedocs.io/en/latest/
+    [3]_ John D'Errico. DERIVEST: Adaptive Robust Numerical Differentiation.
+         https://www.mathworks.com/matlabcentral/fileexchange/13490-adaptive-robust-numerical-differentiation
+    [4]_ Numerical Differentition. Wikipedia.
+         https://en.wikipedia.org/wiki/Numerical_differentiation
+    Examples
+    --------
+    Evaluate the derivative of ``np.exp`` at several points ``x``.
+    >>> import numpy as np
+    >>> from scipy.optimize._differentiate import _differentiate
+    >>> f = np.exp
+    >>> df = np.exp  # true derivative
+    >>> x = np.linspace(1, 2, 5)
+    >>> res = _differentiate(f, x)
+    >>> res.df  # approximation of the derivative
+    array([2.71828183, 3.49034296, 4.48168907, 5.75460268, 7.3890561 ])
+    >>> res.error  # estimate of the error
+    array(
+        [7.12940817e-12, 9.16688947e-12, 1.17594823e-11, 1.50972568e-11, 1.93942640e-11]
+    )
+    >>> abs(res.df - df(x))  # true error
+    array(
+        [3.06421555e-14, 3.01980663e-14, 5.06261699e-14, 6.30606678e-14, 8.34887715e-14]
+    )
+    Show the convergence of the approximation as the step size is reduced.
+    Each iteration, the step size is reduced by `step_factor`, so for
+    sufficiently small initial step, each iteration reduces the error by a
+    factor of ``1/step_factor**order`` until finite precision arithmetic
+    inhibits further improvement.
+    >>> iter = list(range(1, 12))  # maximum iterations
+    >>> hfac = 2  # step size reduction per iteration
+    >>> hdir = [-1, 0, 1]  # compare left-, central-, and right- steps
+    >>> order = 4  # order of differentiation formula
+    >>> x = 1
+    >>> ref = df(x)
+    >>> errors = []  # true error
+    >>> for i in iter:
+    ...     res = _differentiate(f, x, maxiter=i, step_factor=hfac,
+    ...                          step_direction=hdir, order=order,
+    ...                          atol=0, rtol=0)  # prevent early termination
+    ...     errors.append(abs(res.df - ref))
+    >>> errors = np.array(errors)
+    >>> plt.semilogy(iter, errors[:, 0], label='left differences')
+    >>> plt.semilogy(iter, errors[:, 1], label='central differences')
+    >>> plt.semilogy(iter, errors[:, 2], label='right differences')
+    >>> plt.xlabel('iteration')
+    >>> plt.ylabel('error')
+    >>> plt.legend()
+    >>> plt.show()
+    >>> (errors[1, 1] / errors[0, 1], 1 / hfac**order)
+    (0.06215223140159822, 0.0625)
+    The implementation is vectorized over `x`, `step_direction`, and `args`.
+    The function is evaluated once before the first iteration to perform input
+    validation and standardization, and once per iteration thereafter.
+    >>> def f(x, p):
+    ...     print('here')
+    ...     f.nit += 1
+    ...     return x**p
+    >>> f.nit = 0
+    >>> def df(x, p):
+    ...     return p*x**(p-1)
+    >>> x = np.arange(1, 5)
+    >>> p = np.arange(1, 6).reshape((-1, 1))
+    >>> hdir = np.arange(-1, 2).reshape((-1, 1, 1))
+    >>> res = _differentiate(f, x, args=(p,), step_direction=hdir, maxiter=1)
+    >>> np.allclose(res.df, df(x, p))
+    True
+    >>> res.df.shape
+    (3, 5, 4)
+    >>> f.nit
+    2
+    By default, `preserve_shape` is False, and therefore the callable
+    `f` may be called with arrays of any broadcastable shapes.
+    For example:
+    >>> shapes = []
+    >>> def f(x, c):
+    ...    shape = np.broadcast_shapes(x.shape, c.shape)
+    ...    shapes.append(shape)
+    ...    return np.sin(c*x)
+    >>>
+    >>> c = [1, 5, 10, 20]
+    >>> res = _differentiate(f, 0, args=(c,))
+    >>> shapes
+    [(4,), (4, 8), (4, 2), (3, 2), (2, 2), (1, 2)]
+    To understand where these shapes are coming from - and to better
+    understand how `_differentiate` computes accurate results - note that
+    higher values of ``c`` correspond with higher frequency sinusoids.
+    The higher frequency sinusoids make the function's derivative change
+    faster, so more function evaluations are required to achieve the target
+    accuracy:
+    >>> res.nfev
+    array([11, 13, 15, 17])
+    The initial ``shape``, ``(4,)``, corresponds with evaluating the
+    function at a single abscissa and all four frequencies; this is used
+    for input validation and to determine the size and dtype of the arrays
+    that store results. The next shape corresponds with evaluating the
+    function at an initial grid of abscissae and all four frequencies.
+    Successive calls to the function evaluate the function at two more
+    abscissae, increasing the effective order of the approximation by two.
+    However, in later function evaluations, the function is evaluated at
+    fewer frequencies because the corresponding derivative has already
+    converged to the required tolerance. This saves function evaluations to
+    improve performance, but it requires the function to accept arguments of
+    any shape.
+    "Vector-valued" functions are unlikely to satisfy this requirement.
+    For example, consider
+    >>> def f(x):
+    ...    return [x, np.sin(3*x), x+np.sin(10*x), np.sin(20*x)*(x-1)**2]
+    This integrand is not compatible with `_differentiate` as written; for instance,
+    the shape of the output will not be the same as the shape of ``x``. Such a
+    function *could* be converted to a compatible form with the introduction of
+    additional parameters, but this would be inconvenient. In such cases,
+    a simpler solution would be to use `preserve_shape`.
+    >>> shapes = []
+    >>> def f(x):
+    ...     shapes.append(x.shape)
+    ...     x0, x1, x2, x3 = x
+    ...     return [x0, np.sin(3*x1), x2+np.sin(10*x2), np.sin(20*x3)*(x3-1)**2]
+    >>>
+    >>> x = np.zeros(4)
+    >>> res = _differentiate(f, x, preserve_shape=True)
+    >>> shapes
+    [(4,), (4, 8), (4, 2), (4, 2), (4, 2), (4, 2)]
+    Here, the shape of ``x`` is ``(4,)``. With ``preserve_shape=True``, the
+    function may be called with argument ``x`` of shape ``(4,)`` or ``(4, n)``,
+    and this is what we observe.
+    """
+    # TODO (followup):
+    #  - investigate behavior at saddle points
+    #  - array initial_step / step_factor?
+    #  - multivariate functions?
+    res = _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
+                            step_factor, step_direction, preserve_shape, callback)
+    (func, x, args, atol, rtol, maxiter, order,
+     h0, fac, hdir, preserve_shape, callback) = res
+    # Initialization
+    # Since f(x) (no step) is not needed for central differences, it may be
+    # possible to eliminate this function evaluation. However, it's useful for
+    # input validation and standardization, and everything else is designed to
+    # reduce function calls, so let's keep it simple.
+    temp = eim._initialize(func, (x,), args, preserve_shape=preserve_shape)
+    func, xs, fs, args, shape, dtype, xp = temp
+    x, f = xs[0], fs[0]
+    df = np.full_like(f, np.nan)
+    # Ideally we'd broadcast the shape of `hdir` in `_elementwise_algo_init`, but
+    # it's simpler to do it here than to generalize `_elementwise_algo_init` further.
+    # `hdir` and `x` are already broadcasted in `_differentiate_iv`, so we know
+    # that `hdir` can be broadcasted to the final shape.
+    hdir = np.broadcast_to(hdir, shape).flatten()
+    status = np.full_like(x, eim._EINPROGRESS, dtype=int)  # in progress
+    nit, nfev = 0, 1  # one function evaluations performed above
+    # Boolean indices of left, central, right, and (all) one-sided steps
+    il = hdir < 0
+    ic = hdir == 0
+    ir = hdir > 0
+    io = il | ir
+    # Most of these attributes are reasonably obvious, but:
+    # - `fs` holds all the function values of all active `x`. The zeroth
+    #   axis corresponds with active points `x`, the first axis corresponds
+    #   with the different steps (in the order described in
+    #   `_differentiate_weights`).
+    # - `terms` (which could probably use a better name) is half the `order`,
+    #   which is always even.
+    work = _RichResult(x=x, df=df, fs=f[:, np.newaxis], error=np.nan, h=h0,
+                       df_last=np.nan, error_last=np.nan, h0=h0, fac=fac,
+                       atol=atol, rtol=rtol, nit=nit, nfev=nfev,
+                       status=status, dtype=dtype, terms=(order+1)//2,
+                       hdir=hdir, il=il, ic=ic, ir=ir, io=io)
+    # This is the correspondence between terms in the `work` object and the
+    # final result. In this case, the mapping is trivial. Note that `success`
+    # is prepended automatically.
+    res_work_pairs = [('status', 'status'), ('df', 'df'), ('error', 'error'),
+                      ('nit', 'nit'), ('nfev', 'nfev'), ('x', 'x')]
+    def pre_func_eval(work):
+        """Determine the abscissae at which the function needs to be evaluated.
+        See `_differentiate_weights` for a description of the stencil (pattern
+        of the abscissae).
+        In the first iteration, there is only one stored function value in
+        `work.fs`, `f(x)`, so we need to evaluate at `order` new points. In
+        subsequent iterations, we evaluate at two new points. Note that
+        `work.x` is always flattened into a 1D array after broadcasting with
+        all `args`, so we add a new axis at the end and evaluate all point
+        in one call to the function.
+        For improvement:
+        - Consider measuring the step size actually taken, since `(x + h) - x`
+          is not identically equal to `h` with floating point arithmetic.
+        - Adjust the step size automatically if `x` is too big to resolve the
+          step.
+        - We could probably save some work if there are no central difference
+          steps or no one-sided steps.
+        """
+        n = work.terms  # half the order
+        h = work.h  # step size
+        c = work.fac  # step reduction factor
+        d = c**0.5  # square root of step reduction factor (one-sided stencil)
+        # Note - no need to be careful about dtypes until we allocate `x_eval`
+        if work.nit == 0:
+            hc = h / c**np.arange(n)
+            hc = np.concatenate((-hc[::-1], hc))
+        else:
+            hc = np.asarray([-h, h]) / c**(n-1)
+        if work.nit == 0:
+            hr = h / d**np.arange(2*n)
+        else:
+            hr = np.asarray([h, h/d]) / c**(n-1)
+        n_new = 2*n if work.nit == 0 else 2  # number of new abscissae
+        x_eval = np.zeros((len(work.hdir), n_new), dtype=work.dtype)
+        il, ic, ir = work.il, work.ic, work.ir
+        x_eval[ir] = work.x[ir, np.newaxis] + hr
+        x_eval[ic] = work.x[ic, np.newaxis] + hc
+        x_eval[il] = work.x[il, np.newaxis] - hr
+        return x_eval
+    def post_func_eval(x, f, work):
+        """ Estimate the derivative and error from the function evaluations
+        As in `pre_func_eval`: in the first iteration, there is only one stored
+        function value in `work.fs`, `f(x)`, so we need to add the `order` new
+        points. In subsequent iterations, we add two new points. The tricky
+        part is getting the order to match that of the weights, which is
+        described in `_differentiate_weights`.
+        For improvement:
+        - Change the order of the weights (and steps in `pre_func_eval`) to
+          simplify `work_fc` concatenation and eliminate `fc` concatenation.
+        - It would be simple to do one-step Richardson extrapolation with `df`
+          and `df_last` to increase the order of the estimate and/or improve
+          the error estimate.
+        - Process the function evaluations in a more numerically favorable
+          way. For instance, combining the pairs of central difference evals
+          into a second-order approximation and using Richardson extrapolation
+          to produce a higher order approximation seemed to retain accuracy up
+          to very high order.
+        - Alternatively, we could use `polyfit` like Jacobi. An advantage of
+          fitting polynomial to more points than necessary is improved noise
+          tolerance.
+        """
+        n = work.terms
+        n_new = n if work.nit == 0 else 1
+        il, ic, io = work.il, work.ic, work.io
+        # Central difference
+        # `work_fc` is *all* the points at which the function has been evaluated
+        # `fc` is the points we're using *this iteration* to produce the estimate
+        work_fc = (f[ic, :n_new], work.fs[ic, :], f[ic, -n_new:])
+        work_fc = np.concatenate(work_fc, axis=-1)
+        if work.nit == 0:
+            fc = work_fc
+        else:
+            fc = (work_fc[:, :n], work_fc[:, n:n+1], work_fc[:, -n:])
+            fc = np.concatenate(fc, axis=-1)
+        # One-sided difference
+        work_fo = np.concatenate((work.fs[io, :], f[io, :]), axis=-1)
+        if work.nit == 0:
+            fo = work_fo
+        else:
+            fo = np.concatenate((work_fo[:, 0:1], work_fo[:, -2*n:]), axis=-1)
+        work.fs = np.zeros((len(ic), work.fs.shape[-1] + 2*n_new))
+        work.fs[ic] = work_fc
+        work.fs[io] = work_fo
+        wc, wo = _differentiate_weights(work, n)
+        work.df_last = work.df.copy()
+        work.df[ic] = fc @ wc / work.h
+        work.df[io] = fo @ wo / work.h
+        work.df[il] *= -1
+        work.h /= work.fac
+        work.error_last = work.error
+        # Simple error estimate - the difference in derivative estimates between
+        # this iteration and the last. This is typically conservative because if
+        # convergence has begin, the true error is much closer to the difference
+        # between the current estimate and the *next* error estimate. However,
+        # we could use Richarson extrapolation to produce an error estimate that
+        # is one order higher, and take the difference between that and
+        # `work.df` (which would just be constant factor that depends on `fac`.)
+        work.error = abs(work.df - work.df_last)
+    def check_termination(work):
+        """Terminate due to convergence, non-finite values, or error increase"""
+        stop = np.zeros_like(work.df).astype(bool)
+        i = work.error < work.atol + work.rtol*abs(work.df)
+        work.status[i] = eim._ECONVERGED
+        stop[i] = True
+        if work.nit > 0:
+            i = ~((np.isfinite(work.x) & np.isfinite(work.df)) | stop)
+            work.df[i], work.status[i] = np.nan, eim._EVALUEERR
+            stop[i] = True
+        # With infinite precision, there is a step size below which
+        # all smaller step sizes will reduce the error. But in floating point
+        # arithmetic, catastrophic cancellation will begin to cause the error
+        # to increase again. This heuristic tries to avoid step sizes that are
+        # too small. There may be more theoretically sound approaches for
+        # detecting a step size that minimizes the total error, but this
+        # heuristic seems simple and effective.
+        i = (work.error > work.error_last*10) & ~stop
+        work.status[i] = _EERRORINCREASE
+        stop[i] = True
+        return stop
+    def post_termination_check(work):
+        return
+    def customize_result(res, shape):
+        return shape
+    return eim._loop(work, callback, shape, maxiter, func, args, dtype,
+                     pre_func_eval, post_func_eval, check_termination,
+                     post_termination_check, customize_result, res_work_pairs,
+                     xp, preserve_shape)
+def _differentiate_weights(work, n):
+    # This produces the weights of the finite difference formula for a given
+    # stencil. In experiments, use of a second-order central difference formula
+    # with Richardson extrapolation was more accurate numerically, but it was
+    # more complicated, and it would have become even more complicated when
+    # adding support for one-sided differences. However, now that all the
+    # function evaluation values are stored, they can be processed in whatever
+    # way is desired to produce the derivative estimate. We leave alternative
+    # approaches to future work. To be more self-contained, here is the theory
+    # for deriving the weights below.
+    #
+    # Recall that the Taylor expansion of a univariate, scalar-values function
+    # about a point `x` may be expressed as:
+    #      f(x + h)  =     f(x) + f'(x)*h + f''(x)/2!*h**2  + O(h**3)
+    # Suppose we evaluate f(x), f(x+h), and f(x-h).  We have:
+    #      f(x)      =     f(x)
+    #      f(x + h)  =     f(x) + f'(x)*h + f''(x)/2!*h**2  + O(h**3)
+    #      f(x - h)  =     f(x) - f'(x)*h + f''(x)/2!*h**2  + O(h**3)
+    # We can solve for weights `wi` such that:
+    #   w1*f(x)      = w1*(f(x))
+    # + w2*f(x + h)  = w2*(f(x) + f'(x)*h + f''(x)/2!*h**2) + O(h**3)
+    # + w3*f(x - h)  = w3*(f(x) - f'(x)*h + f''(x)/2!*h**2) + O(h**3)
+    #                =     0    + f'(x)*h + 0               + O(h**3)
+    # Then
+    #     f'(x) ~ (w1*f(x) + w2*f(x+h) + w3*f(x-h))/h
+    # is a finite difference derivative approximation with error O(h**2),
+    # and so it is said to be a "second-order" approximation. Under certain
+    # conditions (e.g. well-behaved function, `h` sufficiently small), the
+    # error in the approximation will decrease with h**2; that is, if `h` is
+    # reduced by a factor of 2, the error is reduced by a factor of 4.
+    #
+    # By default, we use eighth-order formulae. Our central-difference formula
+    # uses abscissae:
+    #   x-h/c**3, x-h/c**2, x-h/c, x-h, x, x+h, x+h/c, x+h/c**2, x+h/c**3
+    # where `c` is the step factor. (Typically, the step factor is greater than
+    # one, so the outermost points - as written above - are actually closest to
+    # `x`.) This "stencil" is chosen so that each iteration, the step can be
+    # reduced by the factor `c`, and most of the function evaluations can be
+    # reused with the new step size. For example, in the next iteration, we
+    # will have:
+    #   x-h/c**4, x-h/c**3, x-h/c**2, x-h/c, x, x+h/c, x+h/c**2, x+h/c**3, x+h/c**4
+    # We do not reuse `x-h` and `x+h` for the new derivative estimate.
+    # While this would increase the order of the formula and thus the
+    # theoretical convergence rate, it is also less stable numerically.
+    # (As noted above, there are other ways of processing the values that are
+    # more stable. Thus, even now we store `f(x-h)` and `f(x+h)` in `work.fs`
+    # to simplify future development of this sort of improvement.)
+    #
+    # The (right) one-sided formula is produced similarly using abscissae
+    #   x, x+h, x+h/d, x+h/d**2, ..., x+h/d**6, x+h/d**7, x+h/d**7
+    # where `d` is the square root of `c`. (The left one-sided formula simply
+    # uses -h.) When the step size is reduced by factor `c = d**2`, we have
+    # abscissae:
+    #   x, x+h/d**2, x+h/d**3..., x+h/d**8, x+h/d**9, x+h/d**9
+    # `d` is chosen as the square root of `c` so that the rate of the step-size
+    # reduction is the same per iteration as in the central difference case.
+    # Note that because the central difference formulas are inherently of even
+    # order, for simplicity, we use only even-order formulas for one-sided
+    # differences, too.
+    # It's possible for the user to specify `fac` in, say, double precision but
+    # `x` and `args` in single precision. `fac` gets converted to single
+    # precision, but we should always use double precision for the intermediate
+    # calculations here to avoid additional error in the weights.
+    fac = work.fac.astype(np.float64)
+    # Note that if the user switches back to floating point precision with
+    # `x` and `args`, then `fac` will not necessarily equal the (lower
+    # precision) cached `_differentiate_weights.fac`, and the weights will
+    # need to be recalculated. This could be fixed, but it's late, and of
+    # low consequence.
+    if fac != _differentiate_weights.fac:
+        _differentiate_weights.central = []
+        _differentiate_weights.right = []
+        _differentiate_weights.fac = fac
+    if len(_differentiate_weights.central) != 2*n + 1:
+        # Central difference weights. Consider refactoring this; it could
+        # probably be more compact.
+        i = np.arange(-n, n + 1)
+        p = np.abs(i) - 1.  # center point has power `p` -1, but sign `s` is 0
+        s = np.sign(i)
+        h = s / fac ** p
+        A = np.vander(h, increasing=True).T
+        b = np.zeros(2*n + 1)
+        b[1] = 1
+        weights = np.linalg.solve(A, b)
+        # Enforce identities to improve accuracy
+        weights[n] = 0
+        for i in range(n):
+            weights[-i-1] = -weights[i]
+        # Cache the weights. We only need to calculate them once unless
+        # the step factor changes.
+        _differentiate_weights.central = weights
+        # One-sided difference weights. The left one-sided weights (with
+        # negative steps) are simply the negative of the right one-sided
+        # weights, so no need to compute them separately.
+        i = np.arange(2*n + 1)
+        p = i - 1.
+        s = np.sign(i)
+        h = s / np.sqrt(fac) ** p
+        A = np.vander(h, increasing=True).T
+        b = np.zeros(2 * n + 1)
+        b[1] = 1
+        weights = np.linalg.solve(A, b)
+        _differentiate_weights.right = weights
+    return (_differentiate_weights.central.astype(work.dtype, copy=False),
+            _differentiate_weights.right.astype(work.dtype, copy=False))
+_differentiate_weights.central = []
+_differentiate_weights.right = []
+_differentiate_weights.fac = None
+def _jacobian(func, x, *, atol=None, rtol=None, maxiter=10,
+              order=8, initial_step=0.5, step_factor=2.0):
+    r"""Evaluate the Jacobian of a function numerically.
+    Parameters
+    ----------
+    func : callable
+        The function whose Jacobian is desired. The signature must be::
+            func(x: ndarray) -> ndarray
+         where each element of ``x`` is a finite real. If the function to be
+         differentiated accepts additional, arguments wrap it (e.g. using
+         `functools.partial` or ``lambda``) and pass the wrapped callable
+         into `_jacobian`. See Notes regarding vectorization and the dimensionality
+         of the input and output.
+    x : array_like
+        Points at which to evaluate the Jacobian. Must have at least one dimension.
+        See Notes regarding the dimensionality and vectorization.
+    atol, rtol : float, optional
+        Absolute and relative tolerances for the stopping condition: iteration
+        will stop for each element of the Jacobian when
+        ``res.error < atol + rtol * abs(res.df)``. The default `atol` is the
+        smallest normal number of the appropriate dtype, and the default `rtol`
+        is the square root of the precision of the appropriate dtype.
+    order : int, default: 8
+        The (positive integer) order of the finite difference formula to be
+        used. Odd integers will be rounded up to the next even integer.
+    initial_step : float, default: 0.5
+        The (absolute) initial step size for the finite difference derivative
+        approximation.
+    step_factor : float, default: 2.0
+        The factor by which the step size is *reduced* in each iteration; i.e.
+        the step size in iteration 1 is ``initial_step/step_factor``. If
+        ``step_factor < 1``, subsequent steps will be greater than the initial
+        step; this may be useful if steps smaller than some threshold are
+        undesirable (e.g. due to subtractive cancellation error).
+    maxiter : int, default: 10
+        The maximum number of iterations of the algorithm to perform.
+    Returns
+    -------
+    res : _RichResult
+        An instance of `scipy._lib._util._RichResult` with the following
+        attributes.
+        success : bool array
+            ``True`` when the algorithm terminated successfully (status ``0``).
+        status : int array
+            An integer representing the exit status of the algorithm.
+            ``0`` : The algorithm converged to the specified tolerances.
+            ``-1`` : The error estimate increased, so iteration was terminated.
+            ``-2`` : The maximum number of iterations was reached.
+            ``-3`` : A non-finite value was encountered.
+            ``-4`` : Iteration was terminated by `callback`.
+            ``1`` : The algorithm is proceeding normally (in `callback` only).
+        df : float array
+            The Jacobian of `func` at `x`, if the algorithm terminated
+            successfully.
+        error : float array
+            An estimate of the error: the magnitude of the difference between
+            the current estimate of the derivative and the estimate in the
+            previous iteration.
+        nit : int array
+            The number of iterations performed.
+        nfev : int array
+            The number of points at which `func` was evaluated.
+        x : float array
+            The value at which the derivative of `func` was evaluated.
+    See Also
+    --------
+    _differentiate
+    Notes
+    -----
+    Suppose we wish to evaluate the Jacobian of a function
+    :math:`f: \mathbf{R^m} \rightarrow \mathbf{R^n}`, and assign to variables
+    ``m`` and ``n`` the positive integer values of :math:`m` and :math:`n`,
+    respectively. If we wish to evaluate the Jacobian at a single point,
+    then:
+    - argument `x` must be an array of shape ``(m,)``
+    - argument `func` must be vectorized to accept an array of shape ``(m, p)``.
+      The first axis represents the :math:`m` inputs of :math:`f`; the second
+      is for evaluating the function at multiple points in a single call.
+    - argument `func` must return an array of shape ``(n, p)``. The first
+      axis represents the :math:`n` outputs of :math:`f`; the second
+      is for the result of evaluating the function at multiple points.
+    - attribute ``df`` of the result object will be an array of shape ``(n, m)``,
+      the Jacobian.
+    This function is also vectorized in the sense that the Jacobian can be
+    evaluated at ``k`` points in a single call. In this case, `x` would be an
+    array of shape ``(m, k)``, `func` would accept an array of shape
+    ``(m, k, p)`` and return an array of shape ``(n, k, p)``, and the ``df``
+    attribute of the result would have shape ``(n, m, k)``.
+    References
+    ----------
+    .. [1] Jacobian matrix and determinant, *Wikipedia*,
+           https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant
+    Examples
+    --------
+    The Rosenbrock function maps from :math:`\mathbf{R}^m \righarrow \mathbf{R}`;
+    the SciPy implementation `scipy.optimize.rosen` is vectorized to accept an
+    array of shape ``(m, p)`` and return an array of shape ``m``. Suppose we wish
+    to evaluate the Jacobian (AKA the gradient because the function returns a scalar)
+    at ``[0.5, 0.5, 0.5]``.
+    >>> import numpy as np
+    >>> from scipy.optimize._differentiate import _jacobian as jacobian
+    >>> from scipy.optimize import rosen, rosen_der
+    >>> m = 3
+    >>> x = np.full(m, 0.5)
+    >>> res = jacobian(rosen, x)
+    >>> ref = rosen_der(x)  # reference value of the gradient
+    >>> res.df, ref
+    (array([-51.,  -1.,  50.]), array([-51.,  -1.,  50.]))
+    As an example of a function with multiple outputs, consider Example 4
+    from [1]_.
+    >>> def f(x):
+    ...     x1, x2, x3 = x    ...
+    ...     return [x1, 5*x3, 4*x2**2 - 2*x3, x3*np.sin(x1)]
+    The true Jacobian is given by:
+    >>> def df(x):
+    ...         x1, x2, x3 = x
+    ...         one = np.ones_like(x1)
+    ...         return [[one, 0*one, 0*one],
+    ...                 [0*one, 0*one, 5*one],
+    ...                 [0*one, 8*x2, -2*one],
+    ...                 [x3*np.cos(x1), 0*one, np.sin(x1)]]
+    Evaluate the Jacobian at an arbitrary point.
+    >>> rng = np.random.default_rng(389252938452)
+    >>> x = rng.random(size=3)
+    >>> res = jacobian(f, x)
+    >>> ref = df(x)
+    >>> res.df.shape == (4, 3)
+    True
+    >>> np.allclose(res.df, ref)
+    True
+    Evaluate the Jacobian at 10 arbitrary points in a single call.
+    >>> x = rng.random(size=(3, 10))
+    >>> res = jacobian(f, x)
+    >>> ref = df(x)
+    >>> res.df.shape == (4, 3, 10)
+    True
+    >>> np.allclose(res.df, ref)
+    True
+    """
+    x = np.asarray(x)
+    int_dtype = np.issubdtype(x.dtype, np.integer)
+    x0 = np.asarray(x, dtype=float) if int_dtype else x
+    if x0.ndim < 1:
+        message = "Argument `x` must be at least 1-D."
+        raise ValueError(message)
+    m = x0.shape[0]
+    i = np.arange(m)
+    def wrapped(x):
+        p = () if x.ndim == x0.ndim else (x.shape[-1],)  # number of abscissae
+        new_dims = (1,) if x.ndim == x0.ndim else (1, -1)
+        new_shape = (m, m) + x0.shape[1:] + p
+        xph = np.expand_dims(x0, new_dims)
+        xph = np.broadcast_to(xph, new_shape).copy()
+        xph[i, i] = x
+        return func(xph)
+    res = _differentiate(wrapped, x, atol=atol, rtol=rtol,
+                         maxiter=maxiter, order=order, initial_step=initial_step,
+                         step_factor=step_factor, preserve_shape=True)
+    del res.x  # the user knows `x`, and the way it gets broadcasted is meaningless here
+    return res

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so ADDED Viewed

Binary file (43.5 kB). View file

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py ADDED Viewed

	@@ -0,0 +1,278 @@

+from __future__ import annotations
+from typing import (  # noqa: UP035
+    Any, Callable, Iterable, TYPE_CHECKING
+)
+import numpy as np
+from scipy.optimize import OptimizeResult
+from ._constraints import old_bound_to_new, Bounds
+from ._direct import direct as _direct  # type: ignore
+if TYPE_CHECKING:
+    import numpy.typing as npt
+__all__ = ['direct']
+ERROR_MESSAGES = (
+    "Number of function evaluations done is larger than maxfun={}",
+    "Number of iterations is larger than maxiter={}",
+    "u[i] < l[i] for some i",
+    "maxfun is too large",
+    "Initialization failed",
+    "There was an error in the creation of the sample points",
+    "An error occurred while the function was sampled",
+    "Maximum number of levels has been reached.",
+    "Forced stop",
+    "Invalid arguments",
+    "Out of memory",
+)
+SUCCESS_MESSAGES = (
+    ("The best function value found is within a relative error={} "
+     "of the (known) global optimum f_min"),
+    ("The volume of the hyperrectangle containing the lowest function value "
+     "found is below vol_tol={}"),
+    ("The side length measure of the hyperrectangle containing the lowest "
+     "function value found is below len_tol={}"),
+)
+def direct(
+    func: Callable[[npt.ArrayLike, tuple[Any]], float],
+    bounds: Iterable | Bounds,
+    *,
+    args: tuple = (),
+    eps: float = 1e-4,
+    maxfun: int | None = None,
+    maxiter: int = 1000,
+    locally_biased: bool = True,
+    f_min: float = -np.inf,
+    f_min_rtol: float = 1e-4,
+    vol_tol: float = 1e-16,
+    len_tol: float = 1e-6,
+    callback: Callable[[npt.ArrayLike], None] | None = None
+) -> OptimizeResult:
+    """
+    Finds the global minimum of a function using the
+    DIRECT algorithm.
+    Parameters
+    ----------
+    func : callable
+        The objective function to be minimized.
+        ``func(x, *args) -> float``
+        where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of
+        the fixed parameters needed to completely specify the function.
+    bounds : sequence or `Bounds`
+        Bounds for variables. There are two ways to specify the bounds:
+        1. Instance of `Bounds` class.
+        2. ``(min, max)`` pairs for each element in ``x``.
+    args : tuple, optional
+        Any additional fixed parameters needed to
+        completely specify the objective function.
+    eps : float, optional
+        Minimal required difference of the objective function values
+        between the current best hyperrectangle and the next potentially
+        optimal hyperrectangle to be divided. In consequence, `eps` serves as a
+        tradeoff between local and global search: the smaller, the more local
+        the search becomes. Default is 1e-4.
+    maxfun : int or None, optional
+        Approximate upper bound on objective function evaluations.
+        If `None`, will be automatically set to ``1000 * N`` where ``N``
+        represents the number of dimensions. Will be capped if necessary to
+        limit DIRECT's RAM usage to app. 1GiB. This will only occur for very
+        high dimensional problems and excessive `max_fun`. Default is `None`.
+    maxiter : int, optional
+        Maximum number of iterations. Default is 1000.
+    locally_biased : bool, optional
+        If `True` (default), use the locally biased variant of the
+        algorithm known as DIRECT_L. If `False`, use the original unbiased
+        DIRECT algorithm. For hard problems with many local minima,
+        `False` is recommended.
+    f_min : float, optional
+        Function value of the global optimum. Set this value only if the
+        global optimum is known. Default is ``-np.inf``, so that this
+        termination criterion is deactivated.
+    f_min_rtol : float, optional
+        Terminate the optimization once the relative error between the
+        current best minimum `f` and the supplied global minimum `f_min`
+        is smaller than `f_min_rtol`. This parameter is only used if
+        `f_min` is also set. Must lie between 0 and 1. Default is 1e-4.
+    vol_tol : float, optional
+        Terminate the optimization once the volume of the hyperrectangle
+        containing the lowest function value is smaller than `vol_tol`
+        of the complete search space. Must lie between 0 and 1.
+        Default is 1e-16.
+    len_tol : float, optional
+        If `locally_biased=True`, terminate the optimization once half of
+        the normalized maximal side length of the hyperrectangle containing
+        the lowest function value is smaller than `len_tol`.
+        If `locally_biased=False`, terminate the optimization once half of
+        the normalized diagonal of the hyperrectangle containing the lowest
+        function value is smaller than `len_tol`. Must lie between 0 and 1.
+        Default is 1e-6.
+    callback : callable, optional
+        A callback function with signature ``callback(xk)`` where ``xk``
+        represents the best function value found so far.
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully and
+        ``message`` which describes the cause of the termination. See
+        `OptimizeResult` for a description of other attributes.
+    Notes
+    -----
+    DIviding RECTangles (DIRECT) is a deterministic global
+    optimization algorithm capable of minimizing a black box function with
+    its variables subject to lower and upper bound constraints by sampling
+    potential solutions in the search space [1]_. The algorithm starts by
+    normalising the search space to an n-dimensional unit hypercube.
+    It samples the function at the center of this hypercube and at 2n
+    (n is the number of variables) more points, 2 in each coordinate
+    direction. Using these function values, DIRECT then divides the
+    domain into hyperrectangles, each having exactly one of the sampling
+    points as its center. In each iteration, DIRECT chooses, using the `eps`
+    parameter which defaults to 1e-4, some of the existing hyperrectangles
+    to be further divided. This division process continues until either the
+    maximum number of iterations or maximum function evaluations allowed
+    are exceeded, or the hyperrectangle containing the minimal value found
+    so far becomes small enough. If `f_min` is specified, the optimization
+    will stop once this function value is reached within a relative tolerance.
+    The locally biased variant of DIRECT (originally called DIRECT_L) [2]_ is
+    used by default. It makes the search more locally biased and more
+    efficient for cases with only a few local minima.
+    A note about termination criteria: `vol_tol` refers to the volume of the
+    hyperrectangle containing the lowest function value found so far. This
+    volume decreases exponentially with increasing dimensionality of the
+    problem. Therefore `vol_tol` should be decreased to avoid premature
+    termination of the algorithm for higher dimensions. This does not hold
+    for `len_tol`: it refers either to half of the maximal side length
+    (for ``locally_biased=True``) or half of the diagonal of the
+    hyperrectangle (for ``locally_biased=False``).
+    This code is based on the DIRECT 2.0.4 Fortran code by Gablonsky et al. at
+    https://ctk.math.ncsu.edu/SOFTWARE/DIRECTv204.tar.gz .
+    This original version was initially converted via f2c and then cleaned up
+    and reorganized by Steven G. Johnson, August 2007, for the NLopt project.
+    The `direct` function wraps the C implementation.
+    .. versionadded:: 1.9.0
+    References
+    ----------
+    .. [1] Jones, D.R., Perttunen, C.D. & Stuckman, B.E. Lipschitzian
+        optimization without the Lipschitz constant. J Optim Theory Appl
+        79, 157-181 (1993).
+    .. [2] Gablonsky, J., Kelley, C. A Locally-Biased form of the DIRECT
+        Algorithm. Journal of Global Optimization 21, 27-37 (2001).
+    Examples
+    --------
+    The following example is a 2-D problem with four local minima: minimizing
+    the Styblinski-Tang function
+    (https://en.wikipedia.org/wiki/Test_functions_for_optimization).
+    >>> from scipy.optimize import direct, Bounds
+    >>> def styblinski_tang(pos):
+    ...     x, y = pos
+    ...     return 0.5 * (x**4 - 16*x**2 + 5*x + y**4 - 16*y**2 + 5*y)
+    >>> bounds = Bounds([-4., -4.], [4., 4.])
+    >>> result = direct(styblinski_tang, bounds)
+    >>> result.x, result.fun, result.nfev
+    array([-2.90321597, -2.90321597]), -78.3323279095383, 2011
+    The correct global minimum was found but with a huge number of function
+    evaluations (2011). Loosening the termination tolerances `vol_tol` and
+    `len_tol` can be used to stop DIRECT earlier.
+    >>> result = direct(styblinski_tang, bounds, len_tol=1e-3)
+    >>> result.x, result.fun, result.nfev
+    array([-2.9044353, -2.9044353]), -78.33230330754142, 207
+    """
+    # convert bounds to new Bounds class if necessary
+    if not isinstance(bounds, Bounds):
+        if isinstance(bounds, list) or isinstance(bounds, tuple):
+            lb, ub = old_bound_to_new(bounds)
+            bounds = Bounds(lb, ub)
+        else:
+            message = ("bounds must be a sequence or "
+                       "instance of Bounds class")
+            raise ValueError(message)
+    lb = np.ascontiguousarray(bounds.lb, dtype=np.float64)
+    ub = np.ascontiguousarray(bounds.ub, dtype=np.float64)
+    # validate bounds
+    # check that lower bounds are smaller than upper bounds
+    if not np.all(lb < ub):
+        raise ValueError('Bounds are not consistent min < max')
+    # check for infs
+    if (np.any(np.isinf(lb)) or np.any(np.isinf(ub))):
+        raise ValueError("Bounds must not be inf.")
+    # validate tolerances
+    if (vol_tol < 0 or vol_tol > 1):
+        raise ValueError("vol_tol must be between 0 and 1.")
+    if (len_tol < 0 or len_tol > 1):
+        raise ValueError("len_tol must be between 0 and 1.")
+    if (f_min_rtol < 0 or f_min_rtol > 1):
+        raise ValueError("f_min_rtol must be between 0 and 1.")
+    # validate maxfun and maxiter
+    if maxfun is None:
+        maxfun = 1000 * lb.shape[0]
+    if not isinstance(maxfun, int):
+        raise ValueError("maxfun must be of type int.")
+    if maxfun < 0:
+        raise ValueError("maxfun must be > 0.")
+    if not isinstance(maxiter, int):
+        raise ValueError("maxiter must be of type int.")
+    if maxiter < 0:
+        raise ValueError("maxiter must be > 0.")
+    # validate boolean parameters
+    if not isinstance(locally_biased, bool):
+        raise ValueError("locally_biased must be True or False.")
+    def _func_wrap(x, args=None):
+        x = np.asarray(x)
+        if args is None:
+            f = func(x)
+        else:
+            f = func(x, *args)
+        # always return a float
+        return np.asarray(f).item()
+    # TODO: fix disp argument
+    x, fun, ret_code, nfev, nit = _direct(
+        _func_wrap,
+        np.asarray(lb), np.asarray(ub),
+        args,
+        False, eps, maxfun, maxiter,
+        locally_biased,
+        f_min, f_min_rtol,
+        vol_tol, len_tol, callback
+    )
+    format_val = (maxfun, maxiter, f_min_rtol, vol_tol, len_tol)
+    if ret_code > 2:
+        message = SUCCESS_MESSAGES[ret_code - 3].format(
+                    format_val[ret_code - 1])
+    elif 0 < ret_code <= 2:
+        message = ERROR_MESSAGES[ret_code - 1].format(format_val[ret_code - 1])
+    elif 0 > ret_code > -100:
+        message = ERROR_MESSAGES[abs(ret_code) + 1]
+    else:
+        message = ERROR_MESSAGES[ret_code + 99]
+    return OptimizeResult(x=np.asarray(x), fun=fun, status=ret_code,
+                          success=ret_code > 2, message=message,
+                          nfev=nfev, nit=nit)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py ADDED Viewed

	@@ -0,0 +1,732 @@

+# Dual Annealing implementation.
+# Copyright (c) 2018 Sylvain Gubian <[email protected]>,
+# Yang Xiang <[email protected]>
+# Author: Sylvain Gubian, Yang Xiang, PMP S.A.
+"""
+A Dual Annealing global optimization algorithm
+"""
+import numpy as np
+from scipy.optimize import OptimizeResult
+from scipy.optimize import minimize, Bounds
+from scipy.special import gammaln
+from scipy._lib._util import check_random_state
+from scipy.optimize._constraints import new_bounds_to_old
+__all__ = ['dual_annealing']
+class VisitingDistribution:
+    """
+    Class used to generate new coordinates based on the distorted
+    Cauchy-Lorentz distribution. Depending on the steps within the strategy
+    chain, the class implements the strategy for generating new location
+    changes.
+    Parameters
+    ----------
+    lb : array_like
+        A 1-D NumPy ndarray containing lower bounds of the generated
+        components. Neither NaN or inf are allowed.
+    ub : array_like
+        A 1-D NumPy ndarray containing upper bounds for the generated
+        components. Neither NaN or inf are allowed.
+    visiting_param : float
+        Parameter for visiting distribution. Default value is 2.62.
+        Higher values give the visiting distribution a heavier tail, this
+        makes the algorithm jump to a more distant region.
+        The value range is (1, 3]. Its value is fixed for the life of the
+        object.
+    rand_gen : {`~numpy.random.RandomState`, `~numpy.random.Generator`}
+        A `~numpy.random.RandomState`, `~numpy.random.Generator` object
+        for using the current state of the created random generator container.
+    """
+    TAIL_LIMIT = 1.e8
+    MIN_VISIT_BOUND = 1.e-10
+    def __init__(self, lb, ub, visiting_param, rand_gen):
+        # if you wish to make _visiting_param adjustable during the life of
+        # the object then _factor2, _factor3, _factor5, _d1, _factor6 will
+        # have to be dynamically calculated in `visit_fn`. They're factored
+        # out here so they don't need to be recalculated all the time.
+        self._visiting_param = visiting_param
+        self.rand_gen = rand_gen
+        self.lower = lb
+        self.upper = ub
+        self.bound_range = ub - lb
+        # these are invariant numbers unless visiting_param changes
+        self._factor2 = np.exp((4.0 - self._visiting_param) * np.log(
+            self._visiting_param - 1.0))
+        self._factor3 = np.exp((2.0 - self._visiting_param) * np.log(2.0)
+                               / (self._visiting_param - 1.0))
+        self._factor4_p = np.sqrt(np.pi) * self._factor2 / (self._factor3 * (
+            3.0 - self._visiting_param))
+        self._factor5 = 1.0 / (self._visiting_param - 1.0) - 0.5
+        self._d1 = 2.0 - self._factor5
+        self._factor6 = np.pi * (1.0 - self._factor5) / np.sin(
+            np.pi * (1.0 - self._factor5)) / np.exp(gammaln(self._d1))
+    def visiting(self, x, step, temperature):
+        """ Based on the step in the strategy chain, new coordinates are
+        generated by changing all components is the same time or only
+        one of them, the new values are computed with visit_fn method
+        """
+        dim = x.size
+        if step < dim:
+            # Changing all coordinates with a new visiting value
+            visits = self.visit_fn(temperature, dim)
+            upper_sample, lower_sample = self.rand_gen.uniform(size=2)
+            visits[visits > self.TAIL_LIMIT] = self.TAIL_LIMIT * upper_sample
+            visits[visits < -self.TAIL_LIMIT] = -self.TAIL_LIMIT * lower_sample
+            x_visit = visits + x
+            a = x_visit - self.lower
+            b = np.fmod(a, self.bound_range) + self.bound_range
+            x_visit = np.fmod(b, self.bound_range) + self.lower
+            x_visit[np.fabs(
+                x_visit - self.lower) < self.MIN_VISIT_BOUND] += 1.e-10
+        else:
+            # Changing only one coordinate at a time based on strategy
+            # chain step
+            x_visit = np.copy(x)
+            visit = self.visit_fn(temperature, 1)[0]
+            if visit > self.TAIL_LIMIT:
+                visit = self.TAIL_LIMIT * self.rand_gen.uniform()
+            elif visit < -self.TAIL_LIMIT:
+                visit = -self.TAIL_LIMIT * self.rand_gen.uniform()
+            index = step - dim
+            x_visit[index] = visit + x[index]
+            a = x_visit[index] - self.lower[index]
+            b = np.fmod(a, self.bound_range[index]) + self.bound_range[index]
+            x_visit[index] = np.fmod(b, self.bound_range[
+                index]) + self.lower[index]
+            if np.fabs(x_visit[index] - self.lower[
+                    index]) < self.MIN_VISIT_BOUND:
+                x_visit[index] += self.MIN_VISIT_BOUND
+        return x_visit
+    def visit_fn(self, temperature, dim):
+        """ Formula Visita from p. 405 of reference [2] """
+        x, y = self.rand_gen.normal(size=(dim, 2)).T
+        factor1 = np.exp(np.log(temperature) / (self._visiting_param - 1.0))
+        factor4 = self._factor4_p * factor1
+        # sigmax
+        x *= np.exp(-(self._visiting_param - 1.0) * np.log(
+            self._factor6 / factor4) / (3.0 - self._visiting_param))
+        den = np.exp((self._visiting_param - 1.0) * np.log(np.fabs(y)) /
+                     (3.0 - self._visiting_param))
+        return x / den
+class EnergyState:
+    """
+    Class used to record the energy state. At any time, it knows what is the
+    currently used coordinates and the most recent best location.
+    Parameters
+    ----------
+    lower : array_like
+        A 1-D NumPy ndarray containing lower bounds for generating an initial
+        random components in the `reset` method.
+    upper : array_like
+        A 1-D NumPy ndarray containing upper bounds for generating an initial
+        random components in the `reset` method
+        components. Neither NaN or inf are allowed.
+    callback : callable, ``callback(x, f, context)``, optional
+        A callback function which will be called for all minima found.
+        ``x`` and ``f`` are the coordinates and function value of the
+        latest minimum found, and `context` has value in [0, 1, 2]
+    """
+    # Maximum number of trials for generating a valid starting point
+    MAX_REINIT_COUNT = 1000
+    def __init__(self, lower, upper, callback=None):
+        self.ebest = None
+        self.current_energy = None
+        self.current_location = None
+        self.xbest = None
+        self.lower = lower
+        self.upper = upper
+        self.callback = callback
+    def reset(self, func_wrapper, rand_gen, x0=None):
+        """
+        Initialize current location is the search domain. If `x0` is not
+        provided, a random location within the bounds is generated.
+        """
+        if x0 is None:
+            self.current_location = rand_gen.uniform(self.lower, self.upper,
+                                                     size=len(self.lower))
+        else:
+            self.current_location = np.copy(x0)
+        init_error = True
+        reinit_counter = 0
+        while init_error:
+            self.current_energy = func_wrapper.fun(self.current_location)
+            if self.current_energy is None:
+                raise ValueError('Objective function is returning None')
+            if (not np.isfinite(self.current_energy) or np.isnan(
+                    self.current_energy)):
+                if reinit_counter >= EnergyState.MAX_REINIT_COUNT:
+                    init_error = False
+                    message = (
+                        'Stopping algorithm because function '
+                        'create NaN or (+/-) infinity values even with '
+                        'trying new random parameters'
+                    )
+                    raise ValueError(message)
+                self.current_location = rand_gen.uniform(self.lower,
+                                                         self.upper,
+                                                         size=self.lower.size)
+                reinit_counter += 1
+            else:
+                init_error = False
+            # If first time reset, initialize ebest and xbest
+            if self.ebest is None and self.xbest is None:
+                self.ebest = self.current_energy
+                self.xbest = np.copy(self.current_location)
+            # Otherwise, we keep them in case of reannealing reset
+    def update_best(self, e, x, context):
+        self.ebest = e
+        self.xbest = np.copy(x)
+        if self.callback is not None:
+            val = self.callback(x, e, context)
+            if val is not None:
+                if val:
+                    return ('Callback function requested to stop early by '
+                           'returning True')
+    def update_current(self, e, x):
+        self.current_energy = e
+        self.current_location = np.copy(x)
+class StrategyChain:
+    """
+    Class that implements within a Markov chain the strategy for location
+    acceptance and local search decision making.
+    Parameters
+    ----------
+    acceptance_param : float
+        Parameter for acceptance distribution. It is used to control the
+        probability of acceptance. The lower the acceptance parameter, the
+        smaller the probability of acceptance. Default value is -5.0 with
+        a range (-1e4, -5].
+    visit_dist : VisitingDistribution
+        Instance of `VisitingDistribution` class.
+    func_wrapper : ObjectiveFunWrapper
+        Instance of `ObjectiveFunWrapper` class.
+    minimizer_wrapper: LocalSearchWrapper
+        Instance of `LocalSearchWrapper` class.
+    rand_gen : {None, int, `numpy.random.Generator`,
+                `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+    energy_state: EnergyState
+        Instance of `EnergyState` class.
+    """
+    def __init__(self, acceptance_param, visit_dist, func_wrapper,
+                 minimizer_wrapper, rand_gen, energy_state):
+        # Local strategy chain minimum energy and location
+        self.emin = energy_state.current_energy
+        self.xmin = np.array(energy_state.current_location)
+        # Global optimizer state
+        self.energy_state = energy_state
+        # Acceptance parameter
+        self.acceptance_param = acceptance_param
+        # Visiting distribution instance
+        self.visit_dist = visit_dist
+        # Wrapper to objective function
+        self.func_wrapper = func_wrapper
+        # Wrapper to the local minimizer
+        self.minimizer_wrapper = minimizer_wrapper
+        self.not_improved_idx = 0
+        self.not_improved_max_idx = 1000
+        self._rand_gen = rand_gen
+        self.temperature_step = 0
+        self.K = 100 * len(energy_state.current_location)
+    def accept_reject(self, j, e, x_visit):
+        r = self._rand_gen.uniform()
+        pqv_temp = 1.0 - ((1.0 - self.acceptance_param) *
+            (e - self.energy_state.current_energy) / self.temperature_step)
+        if pqv_temp <= 0.:
+            pqv = 0.
+        else:
+            pqv = np.exp(np.log(pqv_temp) / (
+                1. - self.acceptance_param))
+        if r <= pqv:
+            # We accept the new location and update state
+            self.energy_state.update_current(e, x_visit)
+            self.xmin = np.copy(self.energy_state.current_location)
+        # No improvement for a long time
+        if self.not_improved_idx >= self.not_improved_max_idx:
+            if j == 0 or self.energy_state.current_energy < self.emin:
+                self.emin = self.energy_state.current_energy
+                self.xmin = np.copy(self.energy_state.current_location)
+    def run(self, step, temperature):
+        self.temperature_step = temperature / float(step + 1)
+        self.not_improved_idx += 1
+        for j in range(self.energy_state.current_location.size * 2):
+            if j == 0:
+                if step == 0:
+                    self.energy_state_improved = True
+                else:
+                    self.energy_state_improved = False
+            x_visit = self.visit_dist.visiting(
+                self.energy_state.current_location, j, temperature)
+            # Calling the objective function
+            e = self.func_wrapper.fun(x_visit)
+            if e < self.energy_state.current_energy:
+                # We have got a better energy value
+                self.energy_state.update_current(e, x_visit)
+                if e < self.energy_state.ebest:
+                    val = self.energy_state.update_best(e, x_visit, 0)
+                    if val is not None:
+                        if val:
+                            return val
+                    self.energy_state_improved = True
+                    self.not_improved_idx = 0
+            else:
+                # We have not improved but do we accept the new location?
+                self.accept_reject(j, e, x_visit)
+            if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
+                return ('Maximum number of function call reached '
+                        'during annealing')
+        # End of StrategyChain loop
+    def local_search(self):
+        # Decision making for performing a local search
+        # based on strategy chain results
+        # If energy has been improved or no improvement since too long,
+        # performing a local search with the best strategy chain location
+        if self.energy_state_improved:
+            # Global energy has improved, let's see if LS improves further
+            e, x = self.minimizer_wrapper.local_search(self.energy_state.xbest,
+                                                       self.energy_state.ebest)
+            if e < self.energy_state.ebest:
+                self.not_improved_idx = 0
+                val = self.energy_state.update_best(e, x, 1)
+                if val is not None:
+                    if val:
+                        return val
+                self.energy_state.update_current(e, x)
+            if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
+                return ('Maximum number of function call reached '
+                        'during local search')
+        # Check probability of a need to perform a LS even if no improvement
+        do_ls = False
+        if self.K < 90 * len(self.energy_state.current_location):
+            pls = np.exp(self.K * (
+                self.energy_state.ebest - self.energy_state.current_energy) /
+                self.temperature_step)
+            if pls >= self._rand_gen.uniform():
+                do_ls = True
+        # Global energy not improved, let's see what LS gives
+        # on the best strategy chain location
+        if self.not_improved_idx >= self.not_improved_max_idx:
+            do_ls = True
+        if do_ls:
+            e, x = self.minimizer_wrapper.local_search(self.xmin, self.emin)
+            self.xmin = np.copy(x)
+            self.emin = e
+            self.not_improved_idx = 0
+            self.not_improved_max_idx = self.energy_state.current_location.size
+            if e < self.energy_state.ebest:
+                val = self.energy_state.update_best(
+                    self.emin, self.xmin, 2)
+                if val is not None:
+                    if val:
+                        return val
+                self.energy_state.update_current(e, x)
+            if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
+                return ('Maximum number of function call reached '
+                        'during dual annealing')
+class ObjectiveFunWrapper:
+    def __init__(self, func, maxfun=1e7, *args):
+        self.func = func
+        self.args = args
+        # Number of objective function evaluations
+        self.nfev = 0
+        # Number of gradient function evaluation if used
+        self.ngev = 0
+        # Number of hessian of the objective function if used
+        self.nhev = 0
+        self.maxfun = maxfun
+    def fun(self, x):
+        self.nfev += 1
+        return self.func(x, *self.args)
+class LocalSearchWrapper:
+    """
+    Class used to wrap around the minimizer used for local search
+    Default local minimizer is SciPy minimizer L-BFGS-B
+    """
+    LS_MAXITER_RATIO = 6
+    LS_MAXITER_MIN = 100
+    LS_MAXITER_MAX = 1000
+    def __init__(self, search_bounds, func_wrapper, *args, **kwargs):
+        self.func_wrapper = func_wrapper
+        self.kwargs = kwargs
+        self.jac = self.kwargs.get('jac', None)
+        self.hess = self.kwargs.get('hess', None)
+        self.hessp = self.kwargs.get('hessp', None)
+        self.kwargs.pop("args", None)
+        self.minimizer = minimize
+        bounds_list = list(zip(*search_bounds))
+        self.lower = np.array(bounds_list[0])
+        self.upper = np.array(bounds_list[1])
+        # If no minimizer specified, use SciPy minimize with 'L-BFGS-B' method
+        if not self.kwargs:
+            n = len(self.lower)
+            ls_max_iter = min(max(n * self.LS_MAXITER_RATIO,
+                                  self.LS_MAXITER_MIN),
+                              self.LS_MAXITER_MAX)
+            self.kwargs['method'] = 'L-BFGS-B'
+            self.kwargs['options'] = {
+                'maxiter': ls_max_iter,
+            }
+            self.kwargs['bounds'] = list(zip(self.lower, self.upper))
+        else:
+            if callable(self.jac):
+                def wrapped_jac(x):
+                    return self.jac(x, *args)
+                self.kwargs['jac'] = wrapped_jac
+            if callable(self.hess):
+                def wrapped_hess(x):
+                    return self.hess(x, *args)
+                self.kwargs['hess'] = wrapped_hess
+            if callable(self.hessp):
+                def wrapped_hessp(x, p):
+                    return self.hessp(x, p, *args)
+                self.kwargs['hessp'] = wrapped_hessp
+    def local_search(self, x, e):
+        # Run local search from the given x location where energy value is e
+        x_tmp = np.copy(x)
+        mres = self.minimizer(self.func_wrapper.fun, x, **self.kwargs)
+        if 'njev' in mres:
+            self.func_wrapper.ngev += mres.njev
+        if 'nhev' in mres:
+            self.func_wrapper.nhev += mres.nhev
+        # Check if is valid value
+        is_finite = np.all(np.isfinite(mres.x)) and np.isfinite(mres.fun)
+        in_bounds = np.all(mres.x >= self.lower) and np.all(
+            mres.x <= self.upper)
+        is_valid = is_finite and in_bounds
+        # Use the new point only if it is valid and return a better results
+        if is_valid and mres.fun < e:
+            return mres.fun, mres.x
+        else:
+            return e, x_tmp
+def dual_annealing(func, bounds, args=(), maxiter=1000,
+                   minimizer_kwargs=None, initial_temp=5230.,
+                   restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
+                   maxfun=1e7, seed=None, no_local_search=False,
+                   callback=None, x0=None):
+    """
+    Find the global minimum of a function using Dual Annealing.
+    Parameters
+    ----------
+    func : callable
+        The objective function to be minimized. Must be in the form
+        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
+        and ``args`` is a  tuple of any additional fixed parameters needed to
+        completely specify the function.
+    bounds : sequence or `Bounds`
+        Bounds for variables. There are two ways to specify the bounds:
+        1. Instance of `Bounds` class.
+        2. Sequence of ``(min, max)`` pairs for each element in `x`.
+    args : tuple, optional
+        Any additional fixed parameters needed to completely specify the
+        objective function.
+    maxiter : int, optional
+        The maximum number of global search iterations. Default value is 1000.
+    minimizer_kwargs : dict, optional
+        Keyword arguments to be passed to the local minimizer
+        (`minimize`). An important option could be ``method`` for the minimizer
+        method to use.
+        If no keyword arguments are provided, the local minimizer defaults to
+        'L-BFGS-B' and uses the already supplied bounds. If `minimizer_kwargs`
+        is specified, then the dict must contain all parameters required to
+        control the local minimization. `args` is ignored in this dict, as it is
+        passed automatically. `bounds` is not automatically passed on to the
+        local minimizer as the method may not support them.
+    initial_temp : float, optional
+        The initial temperature, use higher values to facilitates a wider
+        search of the energy landscape, allowing dual_annealing to escape
+        local minima that it is trapped in. Default value is 5230. Range is
+        (0.01, 5.e4].
+    restart_temp_ratio : float, optional
+        During the annealing process, temperature is decreasing, when it
+        reaches ``initial_temp * restart_temp_ratio``, the reannealing process
+        is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
+    visit : float, optional
+        Parameter for visiting distribution. Default value is 2.62. Higher
+        values give the visiting distribution a heavier tail, this makes
+        the algorithm jump to a more distant region. The value range is (1, 3].
+    accept : float, optional
+        Parameter for acceptance distribution. It is used to control the
+        probability of acceptance. The lower the acceptance parameter, the
+        smaller the probability of acceptance. Default value is -5.0 with
+        a range (-1e4, -5].
+    maxfun : int, optional
+        Soft limit for the number of objective function calls. If the
+        algorithm is in the middle of a local search, this number will be
+        exceeded, the algorithm will stop just after the local search is
+        done. Default value is 1e7.
+    seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
+        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
+        singleton is used.
+        If `seed` is an int, a new ``RandomState`` instance is used,
+        seeded with `seed`.
+        If `seed` is already a ``Generator`` or ``RandomState`` instance then
+        that instance is used.
+        Specify `seed` for repeatable minimizations. The random numbers
+        generated with this seed only affect the visiting distribution function
+        and new coordinates generation.
+    no_local_search : bool, optional
+        If `no_local_search` is set to True, a traditional Generalized
+        Simulated Annealing will be performed with no local search
+        strategy applied.
+    callback : callable, optional
+        A callback function with signature ``callback(x, f, context)``,
+        which will be called for all minima found.
+        ``x`` and ``f`` are the coordinates and function value of the
+        latest minimum found, and ``context`` has value in [0, 1, 2], with the
+        following meaning:
+            - 0: minimum detected in the annealing process.
+            - 1: detection occurred in the local search process.
+            - 2: detection done in the dual annealing process.
+        If the callback implementation returns True, the algorithm will stop.
+    x0 : ndarray, shape(n,), optional
+        Coordinates of a single N-D starting point.
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a `OptimizeResult` object.
+        Important attributes are: ``x`` the solution array, ``fun`` the value
+        of the function at the solution, and ``message`` which describes the
+        cause of the termination.
+        See `OptimizeResult` for a description of other attributes.
+    Notes
+    -----
+    This function implements the Dual Annealing optimization. This stochastic
+    approach derived from [3]_ combines the generalization of CSA (Classical
+    Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
+    to a strategy for applying a local search on accepted locations [4]_.
+    An alternative implementation of this same algorithm is described in [5]_
+    and benchmarks are presented in [6]_. This approach introduces an advanced
+    method to refine the solution found by the generalized annealing
+    process. This algorithm uses a distorted Cauchy-Lorentz visiting
+    distribution, with its shape controlled by the parameter :math:`q_{v}`
+    .. math::
+        g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
+        \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
+        \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
+        \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
+        \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}
+    Where :math:`t` is the artificial time. This visiting distribution is used
+    to generate a trial jump distance :math:`\\Delta x(t)` of variable
+    :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.
+    From the starting point, after calling the visiting distribution
+    function, the acceptance probability is computed as follows:
+    .. math::
+        p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
+        \\frac{1}{1-q_{a}}}\\}}
+    Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
+    acceptance probability is assigned to the cases where
+    .. math::
+        [1-(1-q_{a}) \\beta \\Delta E] < 0
+    The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to
+    .. math::
+        T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
+        1 + t\\right)^{q_{v}-1}-1}
+    Where :math:`q_{v}` is the visiting parameter.
+    .. versionadded:: 1.2.0
+    References
+    ----------
+    .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
+        statistics. Journal of Statistical Physics, 52, 479-487 (1998).
+    .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
+        Physica A, 233, 395-406 (1996).
+    .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
+        Annealing Algorithm and Its Application to the Thomson Model.
+        Physics Letters A, 233, 216-220 (1997).
+    .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
+        Annealing. Physical Review E, 62, 4473 (2000).
+    .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
+        Simulated Annealing for Efficient Global Optimization: the GenSA
+        Package for R. The R Journal, Volume 5/1 (2013).
+    .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
+        Statistical Software, 60(6), 1 - 45, (2014).
+        :doi:`10.18637/jss.v060.i06`
+    Examples
+    --------
+    The following example is a 10-D problem, with many local minima.
+    The function involved is called Rastrigin
+    (https://en.wikipedia.org/wiki/Rastrigin_function)
+    >>> import numpy as np
+    >>> from scipy.optimize import dual_annealing
+    >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
+    >>> lw = [-5.12] * 10
+    >>> up = [5.12] * 10
+    >>> ret = dual_annealing(func, bounds=list(zip(lw, up)))
+    >>> ret.x
+    array([-4.26437714e-09, -3.91699361e-09, -1.86149218e-09, -3.97165720e-09,
+           -6.29151648e-09, -6.53145322e-09, -3.93616815e-09, -6.55623025e-09,
+           -6.05775280e-09, -5.00668935e-09]) # random
+    >>> ret.fun
+    0.000000
+    """
+    if isinstance(bounds, Bounds):
+        bounds = new_bounds_to_old(bounds.lb, bounds.ub, len(bounds.lb))
+    if x0 is not None and not len(x0) == len(bounds):
+        raise ValueError('Bounds size does not match x0')
+    lu = list(zip(*bounds))
+    lower = np.array(lu[0])
+    upper = np.array(lu[1])
+    # Check that restart temperature ratio is correct
+    if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
+        raise ValueError('Restart temperature ratio has to be in range (0, 1)')
+    # Checking bounds are valid
+    if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
+            np.isnan(lower)) or np.any(np.isnan(upper))):
+        raise ValueError('Some bounds values are inf values or nan values')
+    # Checking that bounds are consistent
+    if not np.all(lower < upper):
+        raise ValueError('Bounds are not consistent min < max')
+    # Checking that bounds are the same length
+    if not len(lower) == len(upper):
+        raise ValueError('Bounds do not have the same dimensions')
+    # Wrapper for the objective function
+    func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
+    # minimizer_kwargs has to be a dict, not None
+    minimizer_kwargs = minimizer_kwargs or {}
+    minimizer_wrapper = LocalSearchWrapper(
+        bounds, func_wrapper, *args, **minimizer_kwargs)
+    # Initialization of random Generator for reproducible runs if seed provided
+    rand_state = check_random_state(seed)
+    # Initialization of the energy state
+    energy_state = EnergyState(lower, upper, callback)
+    energy_state.reset(func_wrapper, rand_state, x0)
+    # Minimum value of annealing temperature reached to perform
+    # re-annealing
+    temperature_restart = initial_temp * restart_temp_ratio
+    # VisitingDistribution instance
+    visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
+    # Strategy chain instance
+    strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
+                                   minimizer_wrapper, rand_state, energy_state)
+    need_to_stop = False
+    iteration = 0
+    message = []
+    # OptimizeResult object to be returned
+    optimize_res = OptimizeResult()
+    optimize_res.success = True
+    optimize_res.status = 0
+    t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
+    # Run the search loop
+    while not need_to_stop:
+        for i in range(maxiter):
+            # Compute temperature for this step
+            s = float(i) + 2.0
+            t2 = np.exp((visit - 1) * np.log(s)) - 1.0
+            temperature = initial_temp * t1 / t2
+            if iteration >= maxiter:
+                message.append("Maximum number of iteration reached")
+                need_to_stop = True
+                break
+            # Need a re-annealing process?
+            if temperature < temperature_restart:
+                energy_state.reset(func_wrapper, rand_state)
+                break
+            # starting strategy chain
+            val = strategy_chain.run(i, temperature)
+            if val is not None:
+                message.append(val)
+                need_to_stop = True
+                optimize_res.success = False
+                break
+            # Possible local search at the end of the strategy chain
+            if not no_local_search:
+                val = strategy_chain.local_search()
+                if val is not None:
+                    message.append(val)
+                    need_to_stop = True
+                    optimize_res.success = False
+                    break
+            iteration += 1
+    # Setting the OptimizeResult values
+    optimize_res.x = energy_state.xbest
+    optimize_res.fun = energy_state.ebest
+    optimize_res.nit = iteration
+    optimize_res.nfev = func_wrapper.nfev
+    optimize_res.njev = func_wrapper.ngev
+    optimize_res.nhev = func_wrapper.nhev
+    optimize_res.message = message
+    return optimize_res

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so ADDED Viewed

Binary file (99.8 kB). View file

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py ADDED Viewed

	@@ -0,0 +1,475 @@

+"""Hessian update strategies for quasi-Newton optimization methods."""
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import get_blas_funcs, issymmetric
+from warnings import warn
+__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
+class HessianUpdateStrategy:
+    """Interface for implementing Hessian update strategies.
+    Many optimization methods make use of Hessian (or inverse Hessian)
+    approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
+    Some of these  approximations, however, do not actually need to store
+    the entire matrix or can compute the internal matrix product with a
+    given vector in a very efficiently manner. This class serves as an
+    abstract interface between the optimization algorithm and the
+    quasi-Newton update strategies, giving freedom of implementation
+    to store and update the internal matrix as efficiently as possible.
+    Different choices of initialization and update procedure will result
+    in different quasi-Newton strategies.
+    Four methods should be implemented in derived classes: ``initialize``,
+    ``update``, ``dot`` and ``get_matrix``.
+    Notes
+    -----
+    Any instance of a class that implements this interface,
+    can be accepted by the method ``minimize`` and used by
+    the compatible solvers to approximate the Hessian (or
+    inverse Hessian) used by the optimization algorithms.
+    """
+    def initialize(self, n, approx_type):
+        """Initialize internal matrix.
+        Allocate internal memory for storing and updating
+        the Hessian or its inverse.
+        Parameters
+        ----------
+        n : int
+            Problem dimension.
+        approx_type : {'hess', 'inv_hess'}
+            Selects either the Hessian or the inverse Hessian.
+            When set to 'hess' the Hessian will be stored and updated.
+            When set to 'inv_hess' its inverse will be used instead.
+        """
+        raise NotImplementedError("The method ``initialize(n, approx_type)``"
+                                  " is not implemented.")
+    def update(self, delta_x, delta_grad):
+        """Update internal matrix.
+        Update Hessian matrix or its inverse (depending on how 'approx_type'
+        is defined) using information about the last evaluated points.
+        Parameters
+        ----------
+        delta_x : ndarray
+            The difference between two points the gradient
+            function have been evaluated at: ``delta_x = x2 - x1``.
+        delta_grad : ndarray
+            The difference between the gradients:
+            ``delta_grad = grad(x2) - grad(x1)``.
+        """
+        raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
+                                  " is not implemented.")
+    def dot(self, p):
+        """Compute the product of the internal matrix with the given vector.
+        Parameters
+        ----------
+        p : array_like
+            1-D array representing a vector.
+        Returns
+        -------
+        Hp : array
+            1-D represents the result of multiplying the approximation matrix
+            by vector p.
+        """
+        raise NotImplementedError("The method ``dot(p)``"
+                                  " is not implemented.")
+    def get_matrix(self):
+        """Return current internal matrix.
+        Returns
+        -------
+        H : ndarray, shape (n, n)
+            Dense matrix containing either the Hessian
+            or its inverse (depending on how 'approx_type'
+            is defined).
+        """
+        raise NotImplementedError("The method ``get_matrix(p)``"
+                                  " is not implemented.")
+class FullHessianUpdateStrategy(HessianUpdateStrategy):
+    """Hessian update strategy with full dimensional internal representation.
+    """
+    _syr = get_blas_funcs('syr', dtype='d')  # Symmetric rank 1 update
+    _syr2 = get_blas_funcs('syr2', dtype='d')  # Symmetric rank 2 update
+    # Symmetric matrix-vector product
+    _symv = get_blas_funcs('symv', dtype='d')
+    def __init__(self, init_scale='auto'):
+        self.init_scale = init_scale
+        # Until initialize is called we can't really use the class,
+        # so it makes sense to set everything to None.
+        self.first_iteration = None
+        self.approx_type = None
+        self.B = None
+        self.H = None
+    def initialize(self, n, approx_type):
+        """Initialize internal matrix.
+        Allocate internal memory for storing and updating
+        the Hessian or its inverse.
+        Parameters
+        ----------
+        n : int
+            Problem dimension.
+        approx_type : {'hess', 'inv_hess'}
+            Selects either the Hessian or the inverse Hessian.
+            When set to 'hess' the Hessian will be stored and updated.
+            When set to 'inv_hess' its inverse will be used instead.
+        """
+        self.first_iteration = True
+        self.n = n
+        self.approx_type = approx_type
+        if approx_type not in ('hess', 'inv_hess'):
+            raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
+        # Create matrix
+        if self.approx_type == 'hess':
+            self.B = np.eye(n, dtype=float)
+        else:
+            self.H = np.eye(n, dtype=float)
+    def _auto_scale(self, delta_x, delta_grad):
+        # Heuristic to scale matrix at first iteration.
+        # Described in Nocedal and Wright "Numerical Optimization"
+        # p.143 formula (6.20).
+        s_norm2 = np.dot(delta_x, delta_x)
+        y_norm2 = np.dot(delta_grad, delta_grad)
+        ys = np.abs(np.dot(delta_grad, delta_x))
+        if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
+            return 1
+        if self.approx_type == 'hess':
+            return y_norm2 / ys
+        else:
+            return ys / y_norm2
+    def _update_implementation(self, delta_x, delta_grad):
+        raise NotImplementedError("The method ``_update_implementation``"
+                                  " is not implemented.")
+    def update(self, delta_x, delta_grad):
+        """Update internal matrix.
+        Update Hessian matrix or its inverse (depending on how 'approx_type'
+        is defined) using information about the last evaluated points.
+        Parameters
+        ----------
+        delta_x : ndarray
+            The difference between two points the gradient
+            function have been evaluated at: ``delta_x = x2 - x1``.
+        delta_grad : ndarray
+            The difference between the gradients:
+            ``delta_grad = grad(x2) - grad(x1)``.
+        """
+        if np.all(delta_x == 0.0):
+            return
+        if np.all(delta_grad == 0.0):
+            warn('delta_grad == 0.0. Check if the approximated '
+                 'function is linear. If the function is linear '
+                 'better results can be obtained by defining the '
+                 'Hessian as zero instead of using quasi-Newton '
+                 'approximations.',
+                 UserWarning, stacklevel=2)
+            return
+        if self.first_iteration:
+            # Get user specific scale
+            if isinstance(self.init_scale, str) and self.init_scale == "auto":
+                scale = self._auto_scale(delta_x, delta_grad)
+            else:
+                scale = self.init_scale
+            # Check for complex: numpy will silently cast a complex array to
+            # a real one but not so for scalar as it raises a TypeError.
+            # Checking here brings a consistent behavior.
+            replace = False
+            if np.size(scale) == 1:
+                # to account for the legacy behavior having the exact same cast
+                scale = float(scale)
+            elif np.iscomplexobj(scale):
+                raise TypeError("init_scale contains complex elements, "
+                                "must be real.")
+            else:  # test explicitly for allowed shapes and values
+                replace = True
+                if self.approx_type == 'hess':
+                    shape = np.shape(self.B)
+                    dtype = self.B.dtype
+                else:
+                    shape = np.shape(self.H)
+                    dtype = self.H.dtype
+                # copy, will replace the original
+                scale = np.array(scale, dtype=dtype, copy=True)
+                # it has to match the shape of the matrix for the multiplication,
+                # no implicit broadcasting is allowed
+                if shape != (init_shape := np.shape(scale)):
+                    raise ValueError("If init_scale is an array, it must have the "
+                                     f"dimensions of the hess/inv_hess: {shape}."
+                                     f" Got {init_shape}.")
+                if not issymmetric(scale):
+                    raise ValueError("If init_scale is an array, it must be"
+                                     " symmetric (passing scipy.linalg.issymmetric)"
+                                     " to be an approximation of a hess/inv_hess.")
+            # Scale initial matrix with ``scale * np.eye(n)`` or replace
+            # This is not ideal, we could assign the scale directly in
+            # initialize, but we would need to
+            if self.approx_type == 'hess':
+                if replace:
+                    self.B = scale
+                else:
+                    self.B *= scale
+            else:
+                if replace:
+                    self.H = scale
+                else:
+                    self.H *= scale
+            self.first_iteration = False
+        self._update_implementation(delta_x, delta_grad)
+    def dot(self, p):
+        """Compute the product of the internal matrix with the given vector.
+        Parameters
+        ----------
+        p : array_like
+            1-D array representing a vector.
+        Returns
+        -------
+        Hp : array
+            1-D represents the result of multiplying the approximation matrix
+            by vector p.
+        """
+        if self.approx_type == 'hess':
+            return self._symv(1, self.B, p)
+        else:
+            return self._symv(1, self.H, p)
+    def get_matrix(self):
+        """Return the current internal matrix.
+        Returns
+        -------
+        M : ndarray, shape (n, n)
+            Dense matrix containing either the Hessian or its inverse
+            (depending on how `approx_type` was defined).
+        """
+        if self.approx_type == 'hess':
+            M = np.copy(self.B)
+        else:
+            M = np.copy(self.H)
+        li = np.tril_indices_from(M, k=-1)
+        M[li] = M.T[li]
+        return M
+class BFGS(FullHessianUpdateStrategy):
+    """Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
+    Parameters
+    ----------
+    exception_strategy : {'skip_update', 'damp_update'}, optional
+        Define how to proceed when the curvature condition is violated.
+        Set it to 'skip_update' to just skip the update. Or, alternatively,
+        set it to 'damp_update' to interpolate between the actual BFGS
+        result and the unmodified matrix. Both exceptions strategies
+        are explained  in [1]_, p.536-537.
+    min_curvature : float
+        This number, scaled by a normalization factor, defines the
+        minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
+        unaffected by the exception strategy. By default is equal to
+        1e-8 when ``exception_strategy = 'skip_update'`` and equal
+        to 0.2 when ``exception_strategy = 'damp_update'``.
+    init_scale : {float, np.array, 'auto'}
+        This parameter can be used to initialize the Hessian or its
+        inverse. When a float is given, the relevant array is initialized
+        to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
+        Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
+        this array will be used. Otherwise an error is generated.
+        Set it to 'auto' in order to use an automatic heuristic for choosing
+        the initial scale. The heuristic is described in [1]_, p.143.
+        The default is 'auto'.
+    Notes
+    -----
+    The update is based on the description in [1]_, p.140.
+    References
+    ----------
+    .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+           Second Edition (2006).
+    """
+    def __init__(self, exception_strategy='skip_update', min_curvature=None,
+                 init_scale='auto'):
+        if exception_strategy == 'skip_update':
+            if min_curvature is not None:
+                self.min_curvature = min_curvature
+            else:
+                self.min_curvature = 1e-8
+        elif exception_strategy == 'damp_update':
+            if min_curvature is not None:
+                self.min_curvature = min_curvature
+            else:
+                self.min_curvature = 0.2
+        else:
+            raise ValueError("`exception_strategy` must be 'skip_update' "
+                             "or 'damp_update'.")
+        super().__init__(init_scale)
+        self.exception_strategy = exception_strategy
+    def _update_inverse_hessian(self, ys, Hy, yHy, s):
+        """Update the inverse Hessian matrix.
+        BFGS update using the formula:
+            ``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
+                     - 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
+        where ``s = delta_x`` and ``y = delta_grad``. This formula is
+        equivalent to (6.17) in [1]_ written in a more efficient way
+        for implementation.
+        References
+        ----------
+        .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+               Second Edition (2006).
+        """
+        self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
+        self.H = self._syr((ys + yHy) / ys ** 2, s, a=self.H)
+    def _update_hessian(self, ys, Bs, sBs, y):
+        """Update the Hessian matrix.
+        BFGS update using the formula:
+            ``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
+        where ``s`` is short for ``delta_x`` and ``y`` is short
+        for ``delta_grad``. Formula (6.19) in [1]_.
+        References
+        ----------
+        .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+               Second Edition (2006).
+        """
+        self.B = self._syr(1.0 / ys, y, a=self.B)
+        self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
+    def _update_implementation(self, delta_x, delta_grad):
+        # Auxiliary variables w and z
+        if self.approx_type == 'hess':
+            w = delta_x
+            z = delta_grad
+        else:
+            w = delta_grad
+            z = delta_x
+        # Do some common operations
+        wz = np.dot(w, z)
+        Mw = self.dot(w)
+        wMw = Mw.dot(w)
+        # Guarantee that wMw > 0 by reinitializing matrix.
+        # While this is always true in exact arithmetic,
+        # indefinite matrix may appear due to roundoff errors.
+        if wMw <= 0.0:
+            scale = self._auto_scale(delta_x, delta_grad)
+            # Reinitialize matrix
+            if self.approx_type == 'hess':
+                self.B = scale * np.eye(self.n, dtype=float)
+            else:
+                self.H = scale * np.eye(self.n, dtype=float)
+            # Do common operations for new matrix
+            Mw = self.dot(w)
+            wMw = Mw.dot(w)
+        # Check if curvature condition is violated
+        if wz <= self.min_curvature * wMw:
+            # If the option 'skip_update' is set
+            # we just skip the update when the condition
+            # is violated.
+            if self.exception_strategy == 'skip_update':
+                return
+            # If the option 'damp_update' is set we
+            # interpolate between the actual BFGS
+            # result and the unmodified matrix.
+            elif self.exception_strategy == 'damp_update':
+                update_factor = (1-self.min_curvature) / (1 - wz/wMw)
+                z = update_factor*z + (1-update_factor)*Mw
+                wz = np.dot(w, z)
+        # Update matrix
+        if self.approx_type == 'hess':
+            self._update_hessian(wz, Mw, wMw, z)
+        else:
+            self._update_inverse_hessian(wz, Mw, wMw, z)
+class SR1(FullHessianUpdateStrategy):
+    """Symmetric-rank-1 Hessian update strategy.
+    Parameters
+    ----------
+    min_denominator : float
+        This number, scaled by a normalization factor,
+        defines the minimum denominator magnitude allowed
+        in the update. When the condition is violated we skip
+        the update. By default uses ``1e-8``.
+    init_scale : {float, np.array, 'auto'}, optional
+        This parameter can be used to initialize the Hessian or its
+        inverse. When a float is given, the relevant array is initialized
+        to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
+        Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
+        this array will be used. Otherwise an error is generated.
+        Set it to 'auto' in order to use an automatic heuristic for choosing
+        the initial scale. The heuristic is described in [1]_, p.143.
+        The default is 'auto'.
+    Notes
+    -----
+    The update is based on the description in [1]_, p.144-146.
+    References
+    ----------
+    .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+           Second Edition (2006).
+    """
+    def __init__(self, min_denominator=1e-8, init_scale='auto'):
+        self.min_denominator = min_denominator
+        super().__init__(init_scale)
+    def _update_implementation(self, delta_x, delta_grad):
+        # Auxiliary variables w and z
+        if self.approx_type == 'hess':
+            w = delta_x
+            z = delta_grad
+        else:
+            w = delta_grad
+            z = delta_x
+        # Do some common operations
+        Mw = self.dot(w)
+        z_minus_Mw = z - Mw
+        denominator = np.dot(w, z_minus_Mw)
+        # If the denominator is too small
+        # we just skip the update.
+        if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
+            return
+        # Update matrix
+        if self.approx_type == 'hess':
+            self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
+        else:
+            self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING
+import numpy as np
+from ._optimize import OptimizeResult
+from ._pava_pybind import pava
+if TYPE_CHECKING:
+    import numpy.typing as npt
+__all__ = ["isotonic_regression"]
+def isotonic_regression(
+    y: npt.ArrayLike,
+    *,
+    weights: npt.ArrayLike | None = None,
+    increasing: bool = True,
+) -> OptimizeResult:
+    r"""Nonparametric isotonic regression.
+    A (not strictly) monotonically increasing array `x` with the same length
+    as `y` is calculated by the pool adjacent violators algorithm (PAVA), see
+    [1]_. See the Notes section for more details.
+    Parameters
+    ----------
+    y : (N,) array_like
+        Response variable.
+    weights : (N,) array_like or None
+        Case weights.
+    increasing : bool
+        If True, fit monotonic increasing, i.e. isotonic, regression.
+        If False, fit a monotonic decreasing, i.e. antitonic, regression.
+        Default is True.
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are:
+        - ``x``: The isotonic regression solution, i.e. an increasing (or
+          decreasing) array of the same length than y, with elements in the
+          range from min(y) to max(y).
+        - ``weights`` : Array with the sum of case weights for each block
+          (or pool) B.
+        - ``blocks``: Array of length B+1 with the indices of the start
+          positions of each block (or pool) B. The j-th block is given by
+          ``x[blocks[j]:blocks[j+1]]`` for which all values are the same.
+    Notes
+    -----
+    Given data :math:`y` and case weights :math:`w`, the isotonic regression
+    solves the following optimization problem:
+    .. math::
+        \operatorname{argmin}_{x_i} \sum_i w_i (y_i - x_i)^2 \quad
+        \text{subject to } x_i \leq x_j \text{ whenever } i \leq j \,.
+    For every input value :math:`y_i`, it generates a value :math:`x_i` such
+    that :math:`x` is increasing (but not strictly), i.e.
+    :math:`x_i \leq x_{i+1}`. This is accomplished by the PAVA.
+    The solution consists of pools or blocks, i.e. neighboring elements of
+    :math:`x`, e.g. :math:`x_i` and :math:`x_{i+1}`, that all have the same
+    value.
+    Most interestingly, the solution stays the same if the squared loss is
+    replaced by the wide class of Bregman functions which are the unique
+    class of strictly consistent scoring functions for the mean, see [2]_
+    and references therein.
+    The implemented version of PAVA according to [1]_ has a computational
+    complexity of O(N) with input size N.
+    References
+    ----------
+    .. [1] Busing, F. M. T. A. (2022).
+           Monotone Regression: A Simple and Fast O(n) PAVA Implementation.
+           Journal of Statistical Software, Code Snippets, 102(1), 1-25.
+           :doi:`10.18637/jss.v102.c01`
+    .. [2] Jordan, A.I., Mühlemann, A. & Ziegel, J.F.
+           Characterizing the optimal solutions to the isotonic regression
+           problem for identifiable functionals.
+           Ann Inst Stat Math 74, 489-514 (2022).
+           :doi:`10.1007/s10463-021-00808-0`
+    Examples
+    --------
+    This example demonstrates that ``isotonic_regression`` really solves a
+    constrained optimization problem.
+    >>> import numpy as np
+    >>> from scipy.optimize import isotonic_regression, minimize
+    >>> y = [1.5, 1.0, 4.0, 6.0, 5.7, 5.0, 7.8, 9.0, 7.5, 9.5, 9.0]
+    >>> def objective(yhat, y):
+    ...     return np.sum((yhat - y)**2)
+    >>> def constraint(yhat, y):
+    ...     # This is for a monotonically increasing regression.
+    ...     return np.diff(yhat)
+    >>> result = minimize(objective, x0=y, args=(y,),
+    ...                   constraints=[{'type': 'ineq',
+    ...                                 'fun': lambda x: constraint(x, y)}])
+    >>> result.x
+    array([1.25      , 1.25      , 4.        , 5.56666667, 5.56666667,
+           5.56666667, 7.8       , 8.25      , 8.25      , 9.25      ,
+           9.25      ])
+    >>> result = isotonic_regression(y)
+    >>> result.x
+    array([1.25      , 1.25      , 4.        , 5.56666667, 5.56666667,
+           5.56666667, 7.8       , 8.25      , 8.25      , 9.25      ,
+           9.25      ])
+    The big advantage of ``isotonic_regression`` compared to calling
+    ``minimize`` is that it is more user friendly, i.e. one does not need to
+    define objective and constraint functions, and that it is orders of
+    magnitudes faster. On commodity hardware (in 2023), for normal distributed
+    input y of length 1000, the minimizer takes about 4 seconds, while
+    ``isotonic_regression`` takes about 200 microseconds.
+    """
+    yarr = np.atleast_1d(y)  # Check yarr.ndim == 1 is implicit (pybind11) in pava.
+    order = slice(None) if increasing else slice(None, None, -1)
+    x = np.array(yarr[order], order="C", dtype=np.float64, copy=True)
+    if weights is None:
+        wx = np.ones_like(yarr, dtype=np.float64)
+    else:
+        warr = np.atleast_1d(weights)
+        if not (yarr.ndim == warr.ndim == 1 and yarr.shape[0] == warr.shape[0]):
+            raise ValueError(
+                "Input arrays y and w must have one dimension of equal length."
+            )
+        if np.any(warr <= 0):
+            raise ValueError("Weights w must be strictly positive.")
+        wx = np.array(warr[order], order="C", dtype=np.float64, copy=True)
+    n = x.shape[0]
+    r = np.full(shape=n + 1, fill_value=-1, dtype=np.intp)
+    x, wx, r, b = pava(x, wx, r)
+    # Now that we know the number of blocks b, we only keep the relevant part
+    # of r and wx.
+    # As information: Due to the pava implementation, after the last block
+    # index, there might be smaller numbers appended to r, e.g.
+    # r = [0, 10, 8, 7] which in the end should be r = [0, 10].
+    r = r[:b + 1]
+    wx = wx[:b]
+    if not increasing:
+        x = x[::-1]
+        wx = wx[::-1]
+        r = r[-1] - r[::-1]
+    return OptimizeResult(
+        x=x,
+        weights=wx,
+        blocks=r,
+    )

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py ADDED Viewed

	@@ -0,0 +1,543 @@

+"""
+Functions
+---------
+.. autosummary::
+   :toctree: generated/
+    fmin_l_bfgs_b
+"""
+## License for the Python wrapper
+## ==============================
+## Copyright (c) 2004 David M. Cooke <[email protected]>
+## Permission is hereby granted, free of charge, to any person obtaining a
+## copy of this software and associated documentation files (the "Software"),
+## to deal in the Software without restriction, including without limitation
+## the rights to use, copy, modify, merge, publish, distribute, sublicense,
+## and/or sell copies of the Software, and to permit persons to whom the
+## Software is furnished to do so, subject to the following conditions:
+## The above copyright notice and this permission notice shall be included in
+## all copies or substantial portions of the Software.
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+## DEALINGS IN THE SOFTWARE.
+## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
+import numpy as np
+from numpy import array, asarray, float64, zeros
+from . import _lbfgsb
+from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
+                        _wrap_callback, _check_unknown_options,
+                        _prepare_scalar_function)
+from ._constraints import old_bound_to_new
+from scipy.sparse.linalg import LinearOperator
+__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
+def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
+                  approx_grad=0,
+                  bounds=None, m=10, factr=1e7, pgtol=1e-5,
+                  epsilon=1e-8,
+                  iprint=-1, maxfun=15000, maxiter=15000, disp=None,
+                  callback=None, maxls=20):
+    """
+    Minimize a function func using the L-BFGS-B algorithm.
+    Parameters
+    ----------
+    func : callable f(x,*args)
+        Function to minimize.
+    x0 : ndarray
+        Initial guess.
+    fprime : callable fprime(x,*args), optional
+        The gradient of `func`. If None, then `func` returns the function
+        value and the gradient (``f, g = func(x, *args)``), unless
+        `approx_grad` is True in which case `func` returns only ``f``.
+    args : sequence, optional
+        Arguments to pass to `func` and `fprime`.
+    approx_grad : bool, optional
+        Whether to approximate the gradient numerically (in which case
+        `func` returns only the function value).
+    bounds : list, optional
+        ``(min, max)`` pairs for each element in ``x``, defining
+        the bounds on that parameter. Use None or +-inf for one of ``min`` or
+        ``max`` when there is no bound in that direction.
+    m : int, optional
+        The maximum number of variable metric corrections
+        used to define the limited memory matrix. (The limited memory BFGS
+        method does not store the full hessian but uses this many terms in an
+        approximation to it.)
+    factr : float, optional
+        The iteration stops when
+        ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
+        where ``eps`` is the machine precision, which is automatically
+        generated by the code. Typical values for `factr` are: 1e12 for
+        low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
+        high accuracy. See Notes for relationship to `ftol`, which is exposed
+        (instead of `factr`) by the `scipy.optimize.minimize` interface to
+        L-BFGS-B.
+    pgtol : float, optional
+        The iteration will stop when
+        ``max{|proj g_i | i = 1, ..., n} <= pgtol``
+        where ``proj g_i`` is the i-th component of the projected gradient.
+    epsilon : float, optional
+        Step size used when `approx_grad` is True, for numerically
+        calculating the gradient
+    iprint : int, optional
+        Controls the frequency of output. ``iprint < 0`` means no output;
+        ``iprint = 0``    print only one line at the last iteration;
+        ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
+        ``iprint = 99``   print details of every iteration except n-vectors;
+        ``iprint = 100``  print also the changes of active set and final x;
+        ``iprint > 100``  print details of every iteration including x and g.
+    disp : int, optional
+        If zero, then no output. If a positive number, then this over-rides
+        `iprint` (i.e., `iprint` gets the value of `disp`).
+    maxfun : int, optional
+        Maximum number of function evaluations. Note that this function
+        may violate the limit because of evaluating gradients by numerical
+        differentiation.
+    maxiter : int, optional
+        Maximum number of iterations.
+    callback : callable, optional
+        Called after each iteration, as ``callback(xk)``, where ``xk`` is the
+        current parameter vector.
+    maxls : int, optional
+        Maximum number of line search steps (per iteration). Default is 20.
+    Returns
+    -------
+    x : array_like
+        Estimated position of the minimum.
+    f : float
+        Value of `func` at the minimum.
+    d : dict
+        Information dictionary.
+        * d['warnflag'] is
+          - 0 if converged,
+          - 1 if too many function evaluations or too many iterations,
+          - 2 if stopped for another reason, given in d['task']
+        * d['grad'] is the gradient at the minimum (should be 0 ish)
+        * d['funcalls'] is the number of function calls made.
+        * d['nit'] is the number of iterations.
+    See also
+    --------
+    minimize: Interface to minimization algorithms for multivariate
+        functions. See the 'L-BFGS-B' `method` in particular. Note that the
+        `ftol` option is made available via that interface, while `factr` is
+        provided via this interface, where `factr` is the factor multiplying
+        the default machine floating-point precision to arrive at `ftol`:
+        ``ftol = factr * numpy.finfo(float).eps``.
+    Notes
+    -----
+    License of L-BFGS-B (FORTRAN code):
+    The version included here (in fortran code) is 3.0
+    (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
+    and Jorge Nocedal <[email protected]>. It carries the following
+    condition for use:
+    This software is freely available, but we expect that all publications
+    describing work using this software, or all commercial products using it,
+    quote at least one of the references given below. This software is released
+    under the BSD License.
+    References
+    ----------
+    * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
+      Constrained Optimization, (1995), SIAM Journal on Scientific and
+      Statistical Computing, 16, 5, pp. 1190-1208.
+    * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (1997),
+      ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
+    * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (2011),
+      ACM Transactions on Mathematical Software, 38, 1.
+    Examples
+    --------
+    Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we define
+    an objective function ``f(m, b) = (y - y_model)**2``, where `y` describes the
+    observations and `y_model` the prediction of the linear model as
+    ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, are arbitrarily
+    chosen as ``(0,5)`` and ``(5,10)`` for this example.
+    >>> import numpy as np
+    >>> from scipy.optimize import fmin_l_bfgs_b
+    >>> X = np.arange(0, 10, 1)
+    >>> M = 2
+    >>> B = 3
+    >>> Y = M * X + B
+    >>> def func(parameters, *args):
+    ...     x = args[0]
+    ...     y = args[1]
+    ...     m, b = parameters
+    ...     y_model = m*x + b
+    ...     error = sum(np.power((y - y_model), 2))
+    ...     return error
+    >>> initial_values = np.array([0.0, 1.0])
+    >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
+    ...                                    approx_grad=True)
+    >>> x_opt, f_opt
+    array([1.99999999, 3.00000006]), 1.7746231151323805e-14  # may vary
+    The optimized parameters in ``x_opt`` agree with the ground truth parameters
+    ``m`` and ``b``. Next, let us perform a bound contrained optimization using the `bounds`
+    parameter.
+    >>> bounds = [(0, 5), (5, 10)]
+    >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
+    ...                                   approx_grad=True, bounds=bounds)
+    >>> x_opt, f_opt
+    array([1.65990508, 5.31649385]), 15.721334516453945  # may vary
+    """
+    # handle fprime/approx_grad
+    if approx_grad:
+        fun = func
+        jac = None
+    elif fprime is None:
+        fun = MemoizeJac(func)
+        jac = fun.derivative
+    else:
+        fun = func
+        jac = fprime
+    # build options
+    callback = _wrap_callback(callback)
+    opts = {'disp': disp,
+            'iprint': iprint,
+            'maxcor': m,
+            'ftol': factr * np.finfo(float).eps,
+            'gtol': pgtol,
+            'eps': epsilon,
+            'maxfun': maxfun,
+            'maxiter': maxiter,
+            'callback': callback,
+            'maxls': maxls}
+    res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
+                           **opts)
+    d = {'grad': res['jac'],
+         'task': res['message'],
+         'funcalls': res['nfev'],
+         'nit': res['nit'],
+         'warnflag': res['status']}
+    f = res['fun']
+    x = res['x']
+    return x, f, d
+def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
+                     disp=None, maxcor=10, ftol=2.2204460492503131e-09,
+                     gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
+                     iprint=-1, callback=None, maxls=20,
+                     finite_diff_rel_step=None, **unknown_options):
+    """
+    Minimize a scalar function of one or more variables using the L-BFGS-B
+    algorithm.
+    Options
+    -------
+    disp : None or int
+        If `disp is None` (the default), then the supplied version of `iprint`
+        is used. If `disp is not None`, then it overrides the supplied version
+        of `iprint` with the behaviour you outlined.
+    maxcor : int
+        The maximum number of variable metric corrections used to
+        define the limited memory matrix. (The limited memory BFGS
+        method does not store the full hessian but uses this many terms
+        in an approximation to it.)
+    ftol : float
+        The iteration stops when ``(f^k -
+        f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
+    gtol : float
+        The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
+        <= gtol`` where ``proj g_i`` is the i-th component of the
+        projected gradient.
+    eps : float or ndarray
+        If `jac is None` the absolute step size used for numerical
+        approximation of the jacobian via forward differences.
+    maxfun : int
+        Maximum number of function evaluations. Note that this function
+        may violate the limit because of evaluating gradients by numerical
+        differentiation.
+    maxiter : int
+        Maximum number of iterations.
+    iprint : int, optional
+        Controls the frequency of output. ``iprint < 0`` means no output;
+        ``iprint = 0``    print only one line at the last iteration;
+        ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
+        ``iprint = 99``   print details of every iteration except n-vectors;
+        ``iprint = 100``  print also the changes of active set and final x;
+        ``iprint > 100``  print details of every iteration including x and g.
+    maxls : int, optional
+        Maximum number of line search steps (per iteration). Default is 20.
+    finite_diff_rel_step : None or array_like, optional
+        If `jac in ['2-point', '3-point', 'cs']` the relative step size to
+        use for numerical approximation of the jacobian. The absolute step
+        size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
+        possibly adjusted to fit into the bounds. For ``method='3-point'``
+        the sign of `h` is ignored. If None (default) then step is selected
+        automatically.
+    Notes
+    -----
+    The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
+    but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
+    relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
+    I.e., `factr` multiplies the default machine floating-point precision to
+    arrive at `ftol`.
+    """
+    _check_unknown_options(unknown_options)
+    m = maxcor
+    pgtol = gtol
+    factr = ftol / np.finfo(float).eps
+    x0 = asarray(x0).ravel()
+    n, = x0.shape
+    # historically old-style bounds were/are expected by lbfgsb.
+    # That's still the case but we'll deal with new-style from here on,
+    # it's easier
+    if bounds is None:
+        pass
+    elif len(bounds) != n:
+        raise ValueError('length of x0 != length of bounds')
+    else:
+        bounds = np.array(old_bound_to_new(bounds))
+        # check bounds
+        if (bounds[0] > bounds[1]).any():
+            raise ValueError(
+                "LBFGSB - one of the lower bounds is greater than an upper bound."
+            )
+        # initial vector must lie within the bounds. Otherwise ScalarFunction and
+        # approx_derivative will cause problems
+        x0 = np.clip(x0, bounds[0], bounds[1])
+    if disp is not None:
+        if disp == 0:
+            iprint = -1
+        else:
+            iprint = disp
+    # _prepare_scalar_function can use bounds=None to represent no bounds
+    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
+                                  bounds=bounds,
+                                  finite_diff_rel_step=finite_diff_rel_step)
+    func_and_grad = sf.fun_and_grad
+    fortran_int = _lbfgsb.types.intvar.dtype
+    nbd = zeros(n, fortran_int)
+    low_bnd = zeros(n, float64)
+    upper_bnd = zeros(n, float64)
+    bounds_map = {(-np.inf, np.inf): 0,
+                  (1, np.inf): 1,
+                  (1, 1): 2,
+                  (-np.inf, 1): 3}
+    if bounds is not None:
+        for i in range(0, n):
+            l, u = bounds[0, i], bounds[1, i]
+            if not np.isinf(l):
+                low_bnd[i] = l
+                l = 1
+            if not np.isinf(u):
+                upper_bnd[i] = u
+                u = 1
+            nbd[i] = bounds_map[l, u]
+    if not maxls > 0:
+        raise ValueError('maxls must be positive.')
+    x = array(x0, float64)
+    f = array(0.0, float64)
+    g = zeros((n,), float64)
+    wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
+    iwa = zeros(3*n, fortran_int)
+    task = zeros(1, 'S60')
+    csave = zeros(1, 'S60')
+    lsave = zeros(4, fortran_int)
+    isave = zeros(44, fortran_int)
+    dsave = zeros(29, float64)
+    task[:] = 'START'
+    n_iterations = 0
+    while 1:
+        # g may become float32 if a user provides a function that calculates
+        # the Jacobian in float32 (see gh-18730). The underlying Fortran code
+        # expects float64, so upcast it
+        g = g.astype(np.float64)
+        # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
+        _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
+                       pgtol, wa, iwa, task, iprint, csave, lsave,
+                       isave, dsave, maxls)
+        task_str = task.tobytes()
+        if task_str.startswith(b'FG'):
+            # The minimization routine wants f and g at the current x.
+            # Note that interruptions due to maxfun are postponed
+            # until the completion of the current minimization iteration.
+            # Overwrite f and g:
+            f, g = func_and_grad(x)
+        elif task_str.startswith(b'NEW_X'):
+            # new iteration
+            n_iterations += 1
+            intermediate_result = OptimizeResult(x=x, fun=f)
+            if _call_callback_maybe_halt(callback, intermediate_result):
+                task[:] = 'STOP: CALLBACK REQUESTED HALT'
+            if n_iterations >= maxiter:
+                task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
+            elif sf.nfev > maxfun:
+                task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
+                           'EXCEEDS LIMIT')
+        else:
+            break
+    task_str = task.tobytes().strip(b'\x00').strip()
+    if task_str.startswith(b'CONV'):
+        warnflag = 0
+    elif sf.nfev > maxfun or n_iterations >= maxiter:
+        warnflag = 1
+    else:
+        warnflag = 2
+    # These two portions of the workspace are described in the mainlb
+    # subroutine in lbfgsb.f. See line 363.
+    s = wa[0: m*n].reshape(m, n)
+    y = wa[m*n: 2*m*n].reshape(m, n)
+    # See lbfgsb.f line 160 for this portion of the workspace.
+    # isave(31) = the total number of BFGS updates prior the current iteration;
+    n_bfgs_updates = isave[30]
+    n_corrs = min(n_bfgs_updates, maxcor)
+    hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
+    task_str = task_str.decode()
+    return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
+                          njev=sf.ngev,
+                          nit=n_iterations, status=warnflag, message=task_str,
+                          x=x, success=(warnflag == 0), hess_inv=hess_inv)
+class LbfgsInvHessProduct(LinearOperator):
+    """Linear operator for the L-BFGS approximate inverse Hessian.
+    This operator computes the product of a vector with the approximate inverse
+    of the Hessian of the objective function, using the L-BFGS limited
+    memory approximation to the inverse Hessian, accumulated during the
+    optimization.
+    Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
+    interface.
+    Parameters
+    ----------
+    sk : array_like, shape=(n_corr, n)
+        Array of `n_corr` most recent updates to the solution vector.
+        (See [1]).
+    yk : array_like, shape=(n_corr, n)
+        Array of `n_corr` most recent updates to the gradient. (See [1]).
+    References
+    ----------
+    .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
+       storage." Mathematics of computation 35.151 (1980): 773-782.
+    """
+    def __init__(self, sk, yk):
+        """Construct the operator."""
+        if sk.shape != yk.shape or sk.ndim != 2:
+            raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
+        n_corrs, n = sk.shape
+        super().__init__(dtype=np.float64, shape=(n, n))
+        self.sk = sk
+        self.yk = yk
+        self.n_corrs = n_corrs
+        self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
+    def _matvec(self, x):
+        """Efficient matrix-vector multiply with the BFGS matrices.
+        This calculation is described in Section (4) of [1].
+        Parameters
+        ----------
+        x : ndarray
+            An array with shape (n,) or (n,1).
+        Returns
+        -------
+        y : ndarray
+            The matrix-vector product
+        """
+        s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
+        q = np.array(x, dtype=self.dtype, copy=True)
+        if q.ndim == 2 and q.shape[1] == 1:
+            q = q.reshape(-1)
+        alpha = np.empty(n_corrs)
+        for i in range(n_corrs-1, -1, -1):
+            alpha[i] = rho[i] * np.dot(s[i], q)
+            q = q - alpha[i]*y[i]
+        r = q
+        for i in range(n_corrs):
+            beta = rho[i] * np.dot(y[i], r)
+            r = r + s[i] * (alpha[i] - beta)
+        return r
+    def todense(self):
+        """Return a dense array representation of this operator.
+        Returns
+        -------
+        arr : ndarray, shape=(n, n)
+            An array with the same shape and containing
+            the same data represented by this `LinearOperator`.
+        """
+        s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
+        I = np.eye(*self.shape, dtype=self.dtype)
+        Hk = I
+        for i in range(n_corrs):
+            A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
+            A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
+            Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
+                                                        s[i][np.newaxis, :])
+        return Hk

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py ADDED Viewed

	@@ -0,0 +1,896 @@

+"""
+Functions
+---------
+.. autosummary::
+   :toctree: generated/
+    line_search_armijo
+    line_search_wolfe1
+    line_search_wolfe2
+    scalar_search_wolfe1
+    scalar_search_wolfe2
+"""
+from warnings import warn
+from ._dcsrch import DCSRCH
+import numpy as np
+__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
+           'scalar_search_wolfe1', 'scalar_search_wolfe2',
+           'line_search_armijo']
+class LineSearchWarning(RuntimeWarning):
+    pass
+def _check_c1_c2(c1, c2):
+    if not (0 < c1 < c2 < 1):
+        raise ValueError("'c1' and 'c2' do not satisfy"
+                         "'0 < c1 < c2 < 1'.")
+#------------------------------------------------------------------------------
+# Minpack's Wolfe line and scalar searches
+#------------------------------------------------------------------------------
+def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
+                       old_fval=None, old_old_fval=None,
+                       args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
+                       xtol=1e-14):
+    """
+    As `scalar_search_wolfe1` but do a line search to direction `pk`
+    Parameters
+    ----------
+    f : callable
+        Function `f(x)`
+    fprime : callable
+        Gradient of `f`
+    xk : array_like
+        Current point
+    pk : array_like
+        Search direction
+    gfk : array_like, optional
+        Gradient of `f` at point `xk`
+    old_fval : float, optional
+        Value of `f` at point `xk`
+    old_old_fval : float, optional
+        Value of `f` at point preceding `xk`
+    The rest of the parameters are the same as for `scalar_search_wolfe1`.
+    Returns
+    -------
+    stp, f_count, g_count, fval, old_fval
+        As in `line_search_wolfe1`
+    gval : array
+        Gradient of `f` at the final point
+    Notes
+    -----
+    Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
+    """
+    if gfk is None:
+        gfk = fprime(xk, *args)
+    gval = [gfk]
+    gc = [0]
+    fc = [0]
+    def phi(s):
+        fc[0] += 1
+        return f(xk + s*pk, *args)
+    def derphi(s):
+        gval[0] = fprime(xk + s*pk, *args)
+        gc[0] += 1
+        return np.dot(gval[0], pk)
+    derphi0 = np.dot(gfk, pk)
+    stp, fval, old_fval = scalar_search_wolfe1(
+            phi, derphi, old_fval, old_old_fval, derphi0,
+            c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
+    return stp, fc[0], gc[0], fval, old_fval, gval[0]
+def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
+                         c1=1e-4, c2=0.9,
+                         amax=50, amin=1e-8, xtol=1e-14):
+    """
+    Scalar function search for alpha that satisfies strong Wolfe conditions
+    alpha > 0 is assumed to be a descent direction.
+    Parameters
+    ----------
+    phi : callable phi(alpha)
+        Function at point `alpha`
+    derphi : callable phi'(alpha)
+        Objective function derivative. Returns a scalar.
+    phi0 : float, optional
+        Value of phi at 0
+    old_phi0 : float, optional
+        Value of phi at previous point
+    derphi0 : float, optional
+        Value derphi at 0
+    c1 : float, optional
+        Parameter for Armijo condition rule.
+    c2 : float, optional
+        Parameter for curvature condition rule.
+    amax, amin : float, optional
+        Maximum and minimum step size
+    xtol : float, optional
+        Relative tolerance for an acceptable step.
+    Returns
+    -------
+    alpha : float
+        Step size, or None if no suitable step was found
+    phi : float
+        Value of `phi` at the new point `alpha`
+    phi0 : float
+        Value of `phi` at `alpha=0`
+    Notes
+    -----
+    Uses routine DCSRCH from MINPACK.
+    Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.
+    References
+    ----------
+    .. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
+       In Springer Series in Operations Research and Financial Engineering.
+       (Springer Series in Operations Research and Financial Engineering).
+       Springer Nature.
+    """
+    _check_c1_c2(c1, c2)
+    if phi0 is None:
+        phi0 = phi(0.)
+    if derphi0 is None:
+        derphi0 = derphi(0.)
+    if old_phi0 is not None and derphi0 != 0:
+        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+        if alpha1 < 0:
+            alpha1 = 1.0
+    else:
+        alpha1 = 1.0
+    maxiter = 100
+    dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
+    stp, phi1, phi0, task = dcsrch(
+        alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
+    )
+    return stp, phi1, phi0
+line_search = line_search_wolfe1
+#------------------------------------------------------------------------------
+# Pure-Python Wolfe line and scalar searches
+#------------------------------------------------------------------------------
+# Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`
+def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
+                       old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
+                       extra_condition=None, maxiter=10):
+    """Find alpha that satisfies strong Wolfe conditions.
+    Parameters
+    ----------
+    f : callable f(x,*args)
+        Objective function.
+    myfprime : callable f'(x,*args)
+        Objective function gradient.
+    xk : ndarray
+        Starting point.
+    pk : ndarray
+        Search direction. The search direction must be a descent direction
+        for the algorithm to converge.
+    gfk : ndarray, optional
+        Gradient value for x=xk (xk being the current parameter
+        estimate). Will be recomputed if omitted.
+    old_fval : float, optional
+        Function value for x=xk. Will be recomputed if omitted.
+    old_old_fval : float, optional
+        Function value for the point preceding x=xk.
+    args : tuple, optional
+        Additional arguments passed to objective function.
+    c1 : float, optional
+        Parameter for Armijo condition rule.
+    c2 : float, optional
+        Parameter for curvature condition rule.
+    amax : float, optional
+        Maximum step size
+    extra_condition : callable, optional
+        A callable of the form ``extra_condition(alpha, x, f, g)``
+        returning a boolean. Arguments are the proposed step ``alpha``
+        and the corresponding ``x``, ``f`` and ``g`` values. The line search
+        accepts the value of ``alpha`` only if this
+        callable returns ``True``. If the callable returns ``False``
+        for the step length, the algorithm will continue with
+        new iterates. The callable is only called for iterates
+        satisfying the strong Wolfe conditions.
+    maxiter : int, optional
+        Maximum number of iterations to perform.
+    Returns
+    -------
+    alpha : float or None
+        Alpha for which ``x_new = x0 + alpha * pk``,
+        or None if the line search algorithm did not converge.
+    fc : int
+        Number of function evaluations made.
+    gc : int
+        Number of gradient evaluations made.
+    new_fval : float or None
+        New function value ``f(x_new)=f(x0+alpha*pk)``,
+        or None if the line search algorithm did not converge.
+    old_fval : float
+        Old function value ``f(x0)``.
+    new_slope : float or None
+        The local slope along the search direction at the
+        new value ``<myfprime(x_new), pk>``,
+        or None if the line search algorithm did not converge.
+    Notes
+    -----
+    Uses the line search algorithm to enforce strong Wolfe
+    conditions. See Wright and Nocedal, 'Numerical Optimization',
+    1999, pp. 59-61.
+    The search direction `pk` must be a descent direction (e.g.
+    ``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
+    conditions. If the search direction is not a descent direction (e.g.
+    ``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.optimize import line_search
+    A objective function and its gradient are defined.
+    >>> def obj_func(x):
+    ...     return (x[0])**2+(x[1])**2
+    >>> def obj_grad(x):
+    ...     return [2*x[0], 2*x[1]]
+    We can find alpha that satisfies strong Wolfe conditions.
+    >>> start_point = np.array([1.8, 1.7])
+    >>> search_gradient = np.array([-1.0, -1.0])
+    >>> line_search(obj_func, obj_grad, start_point, search_gradient)
+    (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
+    """
+    fc = [0]
+    gc = [0]
+    gval = [None]
+    gval_alpha = [None]
+    def phi(alpha):
+        fc[0] += 1
+        return f(xk + alpha * pk, *args)
+    fprime = myfprime
+    def derphi(alpha):
+        gc[0] += 1
+        gval[0] = fprime(xk + alpha * pk, *args)  # store for later use
+        gval_alpha[0] = alpha
+        return np.dot(gval[0], pk)
+    if gfk is None:
+        gfk = fprime(xk, *args)
+    derphi0 = np.dot(gfk, pk)
+    if extra_condition is not None:
+        # Add the current gradient as argument, to avoid needless
+        # re-evaluation
+        def extra_condition2(alpha, phi):
+            if gval_alpha[0] != alpha:
+                derphi(alpha)
+            x = xk + alpha * pk
+            return extra_condition(alpha, x, phi, gval[0])
+    else:
+        extra_condition2 = None
+    alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
+            phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
+            extra_condition2, maxiter=maxiter)
+    if derphi_star is None:
+        warn('The line search algorithm did not converge',
+             LineSearchWarning, stacklevel=2)
+    else:
+        # derphi_star is a number (derphi) -- so use the most recently
+        # calculated gradient used in computing it derphi = gfk*pk
+        # this is the gradient at the next step no need to compute it
+        # again in the outer loop.
+        derphi_star = gval[0]
+    return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
+def scalar_search_wolfe2(phi, derphi, phi0=None,
+                         old_phi0=None, derphi0=None,
+                         c1=1e-4, c2=0.9, amax=None,
+                         extra_condition=None, maxiter=10):
+    """Find alpha that satisfies strong Wolfe conditions.
+    alpha > 0 is assumed to be a descent direction.
+    Parameters
+    ----------
+    phi : callable phi(alpha)
+        Objective scalar function.
+    derphi : callable phi'(alpha)
+        Objective function derivative. Returns a scalar.
+    phi0 : float, optional
+        Value of phi at 0.
+    old_phi0 : float, optional
+        Value of phi at previous point.
+    derphi0 : float, optional
+        Value of derphi at 0
+    c1 : float, optional
+        Parameter for Armijo condition rule.
+    c2 : float, optional
+        Parameter for curvature condition rule.
+    amax : float, optional
+        Maximum step size.
+    extra_condition : callable, optional
+        A callable of the form ``extra_condition(alpha, phi_value)``
+        returning a boolean. The line search accepts the value
+        of ``alpha`` only if this callable returns ``True``.
+        If the callable returns ``False`` for the step length,
+        the algorithm will continue with new iterates.
+        The callable is only called for iterates satisfying
+        the strong Wolfe conditions.
+    maxiter : int, optional
+        Maximum number of iterations to perform.
+    Returns
+    -------
+    alpha_star : float or None
+        Best alpha, or None if the line search algorithm did not converge.
+    phi_star : float
+        phi at alpha_star.
+    phi0 : float
+        phi at 0.
+    derphi_star : float or None
+        derphi at alpha_star, or None if the line search algorithm
+        did not converge.
+    Notes
+    -----
+    Uses the line search algorithm to enforce strong Wolfe
+    conditions. See Wright and Nocedal, 'Numerical Optimization',
+    1999, pp. 59-61.
+    """
+    _check_c1_c2(c1, c2)
+    if phi0 is None:
+        phi0 = phi(0.)
+    if derphi0 is None:
+        derphi0 = derphi(0.)
+    alpha0 = 0
+    if old_phi0 is not None and derphi0 != 0:
+        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+    else:
+        alpha1 = 1.0
+    if alpha1 < 0:
+        alpha1 = 1.0
+    if amax is not None:
+        alpha1 = min(alpha1, amax)
+    phi_a1 = phi(alpha1)
+    #derphi_a1 = derphi(alpha1) evaluated below
+    phi_a0 = phi0
+    derphi_a0 = derphi0
+    if extra_condition is None:
+        def extra_condition(alpha, phi):
+            return True
+    for i in range(maxiter):
+        if alpha1 == 0 or (amax is not None and alpha0 > amax):
+            # alpha1 == 0: This shouldn't happen. Perhaps the increment has
+            # slipped below machine precision?
+            alpha_star = None
+            phi_star = phi0
+            phi0 = old_phi0
+            derphi_star = None
+            if alpha1 == 0:
+                msg = 'Rounding errors prevent the line search from converging'
+            else:
+                msg = "The line search algorithm could not find a solution " + \
+                      "less than or equal to amax: %s" % amax
+            warn(msg, LineSearchWarning, stacklevel=2)
+            break
+        not_first_iteration = i > 0
+        if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
+           ((phi_a1 >= phi_a0) and not_first_iteration):
+            alpha_star, phi_star, derphi_star = \
+                        _zoom(alpha0, alpha1, phi_a0,
+                              phi_a1, derphi_a0, phi, derphi,
+                              phi0, derphi0, c1, c2, extra_condition)
+            break
+        derphi_a1 = derphi(alpha1)
+        if (abs(derphi_a1) <= -c2*derphi0):
+            if extra_condition(alpha1, phi_a1):
+                alpha_star = alpha1
+                phi_star = phi_a1
+                derphi_star = derphi_a1
+                break
+        if (derphi_a1 >= 0):
+            alpha_star, phi_star, derphi_star = \
+                        _zoom(alpha1, alpha0, phi_a1,
+                              phi_a0, derphi_a1, phi, derphi,
+                              phi0, derphi0, c1, c2, extra_condition)
+            break
+        alpha2 = 2 * alpha1  # increase by factor of two on each iteration
+        if amax is not None:
+            alpha2 = min(alpha2, amax)
+        alpha0 = alpha1
+        alpha1 = alpha2
+        phi_a0 = phi_a1
+        phi_a1 = phi(alpha1)
+        derphi_a0 = derphi_a1
+    else:
+        # stopping test maxiter reached
+        alpha_star = alpha1
+        phi_star = phi_a1
+        derphi_star = None
+        warn('The line search algorithm did not converge',
+             LineSearchWarning, stacklevel=2)
+    return alpha_star, phi_star, phi0, derphi_star
+def _cubicmin(a, fa, fpa, b, fb, c, fc):
+    """
+    Finds the minimizer for a cubic polynomial that goes through the
+    points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
+    If no minimizer can be found, return None.
+    """
+    # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
+    with np.errstate(divide='raise', over='raise', invalid='raise'):
+        try:
+            C = fpa
+            db = b - a
+            dc = c - a
+            denom = (db * dc) ** 2 * (db - dc)
+            d1 = np.empty((2, 2))
+            d1[0, 0] = dc ** 2
+            d1[0, 1] = -db ** 2
+            d1[1, 0] = -dc ** 3
+            d1[1, 1] = db ** 3
+            [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
+                                            fc - fa - C * dc]).flatten())
+            A /= denom
+            B /= denom
+            radical = B * B - 3 * A * C
+            xmin = a + (-B + np.sqrt(radical)) / (3 * A)
+        except ArithmeticError:
+            return None
+    if not np.isfinite(xmin):
+        return None
+    return xmin
+def _quadmin(a, fa, fpa, b, fb):
+    """
+    Finds the minimizer for a quadratic polynomial that goes through
+    the points (a,fa), (b,fb) with derivative at a of fpa.
+    """
+    # f(x) = B*(x-a)^2 + C*(x-a) + D
+    with np.errstate(divide='raise', over='raise', invalid='raise'):
+        try:
+            D = fa
+            C = fpa
+            db = b - a * 1.0
+            B = (fb - D - C * db) / (db * db)
+            xmin = a - C / (2.0 * B)
+        except ArithmeticError:
+            return None
+    if not np.isfinite(xmin):
+        return None
+    return xmin
+def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
+          phi, derphi, phi0, derphi0, c1, c2, extra_condition):
+    """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
+    Part of the optimization algorithm in `scalar_search_wolfe2`.
+    Notes
+    -----
+    Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
+    'Numerical Optimization', 1999, pp. 61.
+    """
+    maxiter = 10
+    i = 0
+    delta1 = 0.2  # cubic interpolant check
+    delta2 = 0.1  # quadratic interpolant check
+    phi_rec = phi0
+    a_rec = 0
+    while True:
+        # interpolate to find a trial step length between a_lo and
+        # a_hi Need to choose interpolation here. Use cubic
+        # interpolation and then if the result is within delta *
+        # dalpha or outside of the interval bounded by a_lo or a_hi
+        # then use quadratic interpolation, if the result is still too
+        # close, then use bisection
+        dalpha = a_hi - a_lo
+        if dalpha < 0:
+            a, b = a_hi, a_lo
+        else:
+            a, b = a_lo, a_hi
+        # minimizer of cubic interpolant
+        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
+        #
+        # if the result is too close to the end points (or out of the
+        # interval), then use quadratic interpolation with phi_lo,
+        # derphi_lo and phi_hi if the result is still too close to the
+        # end points (or out of the interval) then use bisection
+        if (i > 0):
+            cchk = delta1 * dalpha
+            a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
+                            a_rec, phi_rec)
+        if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
+            qchk = delta2 * dalpha
+            a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
+            if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
+                a_j = a_lo + 0.5*dalpha
+        # Check new value of a_j
+        phi_aj = phi(a_j)
+        if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
+            phi_rec = phi_hi
+            a_rec = a_hi
+            a_hi = a_j
+            phi_hi = phi_aj
+        else:
+            derphi_aj = derphi(a_j)
+            if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
+                a_star = a_j
+                val_star = phi_aj
+                valprime_star = derphi_aj
+                break
+            if derphi_aj*(a_hi - a_lo) >= 0:
+                phi_rec = phi_hi
+                a_rec = a_hi
+                a_hi = a_lo
+                phi_hi = phi_lo
+            else:
+                phi_rec = phi_lo
+                a_rec = a_lo
+            a_lo = a_j
+            phi_lo = phi_aj
+            derphi_lo = derphi_aj
+        i += 1
+        if (i > maxiter):
+            # Failed to find a conforming step size
+            a_star = None
+            val_star = None
+            valprime_star = None
+            break
+    return a_star, val_star, valprime_star
+#------------------------------------------------------------------------------
+# Armijo line and scalar searches
+#------------------------------------------------------------------------------
+def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+    """Minimize over alpha, the function ``f(xk+alpha pk)``.
+    Parameters
+    ----------
+    f : callable
+        Function to be minimized.
+    xk : array_like
+        Current point.
+    pk : array_like
+        Search direction.
+    gfk : array_like
+        Gradient of `f` at point `xk`.
+    old_fval : float
+        Value of `f` at point `xk`.
+    args : tuple, optional
+        Optional arguments.
+    c1 : float, optional
+        Value to control stopping criterion.
+    alpha0 : scalar, optional
+        Value of `alpha` at start of the optimization.
+    Returns
+    -------
+    alpha
+    f_count
+    f_val_at_alpha
+    Notes
+    -----
+    Uses the interpolation algorithm (Armijo backtracking) as suggested by
+    Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
+    """
+    xk = np.atleast_1d(xk)
+    fc = [0]
+    def phi(alpha1):
+        fc[0] += 1
+        return f(xk + alpha1*pk, *args)
+    if old_fval is None:
+        phi0 = phi(0.)
+    else:
+        phi0 = old_fval  # compute f(xk) -- done in past loop
+    derphi0 = np.dot(gfk, pk)
+    alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
+                                       alpha0=alpha0)
+    return alpha, fc[0], phi1
+def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+    """
+    Compatibility wrapper for `line_search_armijo`
+    """
+    r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
+                           alpha0=alpha0)
+    return r[0], r[1], 0, r[2]
+def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
+    """Minimize over alpha, the function ``phi(alpha)``.
+    Uses the interpolation algorithm (Armijo backtracking) as suggested by
+    Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
+    alpha > 0 is assumed to be a descent direction.
+    Returns
+    -------
+    alpha
+    phi1
+    """
+    phi_a0 = phi(alpha0)
+    if phi_a0 <= phi0 + c1*alpha0*derphi0:
+        return alpha0, phi_a0
+    # Otherwise, compute the minimizer of a quadratic interpolant:
+    alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
+    phi_a1 = phi(alpha1)
+    if (phi_a1 <= phi0 + c1*alpha1*derphi0):
+        return alpha1, phi_a1
+    # Otherwise, loop with cubic interpolation until we find an alpha which
+    # satisfies the first Wolfe condition (since we are backtracking, we will
+    # assume that the value of alpha is not too small and satisfies the second
+    # condition.
+    while alpha1 > amin:       # we are assuming alpha>0 is a descent direction
+        factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
+        a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
+            alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
+        a = a / factor
+        b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
+            alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
+        b = b / factor
+        alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
+        phi_a2 = phi(alpha2)
+        if (phi_a2 <= phi0 + c1*alpha2*derphi0):
+            return alpha2, phi_a2
+        if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
+            alpha2 = alpha1 / 2.0
+        alpha0 = alpha1
+        alpha1 = alpha2
+        phi_a0 = phi_a1
+        phi_a1 = phi_a2
+    # Failed to find a suitable step length
+    return None, phi_a1
+#------------------------------------------------------------------------------
+# Non-monotone line search for DF-SANE
+#------------------------------------------------------------------------------
+def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
+                                  gamma=1e-4, tau_min=0.1, tau_max=0.5):
+    """
+    Nonmonotone backtracking line search as described in [1]_
+    Parameters
+    ----------
+    f : callable
+        Function returning a tuple ``(f, F)`` where ``f`` is the value
+        of a merit function and ``F`` the residual.
+    x_k : ndarray
+        Initial position.
+    d : ndarray
+        Search direction.
+    prev_fs : float
+        List of previous merit function values. Should have ``len(prev_fs) <= M``
+        where ``M`` is the nonmonotonicity window parameter.
+    eta : float
+        Allowed merit function increase, see [1]_
+    gamma, tau_min, tau_max : float, optional
+        Search parameters, see [1]_
+    Returns
+    -------
+    alpha : float
+        Step length
+    xp : ndarray
+        Next position
+    fp : float
+        Merit function value at next position
+    Fp : ndarray
+        Residual at next position
+    References
+    ----------
+    [1] "Spectral residual method without gradient information for solving
+        large-scale nonlinear systems of equations." W. La Cruz,
+        J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
+    """
+    f_k = prev_fs[-1]
+    f_bar = max(prev_fs)
+    alpha_p = 1
+    alpha_m = 1
+    alpha = 1
+    while True:
+        xp = x_k + alpha_p * d
+        fp, Fp = f(xp)
+        if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
+            alpha = alpha_p
+            break
+        alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
+        xp = x_k - alpha_m * d
+        fp, Fp = f(xp)
+        if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
+            alpha = -alpha_m
+            break
+        alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
+        alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
+        alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
+    return alpha, xp, fp, Fp
+def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
+                                   gamma=1e-4, tau_min=0.1, tau_max=0.5,
+                                   nu=0.85):
+    """
+    Nonmonotone line search from [1]
+    Parameters
+    ----------
+    f : callable
+        Function returning a tuple ``(f, F)`` where ``f`` is the value
+        of a merit function and ``F`` the residual.
+    x_k : ndarray
+        Initial position.
+    d : ndarray
+        Search direction.
+    f_k : float
+        Initial merit function value.
+    C, Q : float
+        Control parameters. On the first iteration, give values
+        Q=1.0, C=f_k
+    eta : float
+        Allowed merit function increase, see [1]_
+    nu, gamma, tau_min, tau_max : float, optional
+        Search parameters, see [1]_
+    Returns
+    -------
+    alpha : float
+        Step length
+    xp : ndarray
+        Next position
+    fp : float
+        Merit function value at next position
+    Fp : ndarray
+        Residual at next position
+    C : float
+        New value for the control parameter C
+    Q : float
+        New value for the control parameter Q
+    References
+    ----------
+    .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
+           search and its application to the spectral residual
+           method'', IMA J. Numer. Anal. 29, 814 (2009).
+    """
+    alpha_p = 1
+    alpha_m = 1
+    alpha = 1
+    while True:
+        xp = x_k + alpha_p * d
+        fp, Fp = f(xp)
+        if fp <= C + eta - gamma * alpha_p**2 * f_k:
+            alpha = alpha_p
+            break
+        alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
+        xp = x_k - alpha_m * d
+        fp, Fp = f(xp)
+        if fp <= C + eta - gamma * alpha_m**2 * f_k:
+            alpha = -alpha_m
+            break
+        alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
+        alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
+        alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
+    # Update C and Q
+    Q_next = nu * Q + 1
+    C = (nu * Q * (C + eta) + fp) / Q_next
+    Q = Q_next
+    return alpha, xp, fp, Fp, C, Q

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py ADDED Viewed

	@@ -0,0 +1,716 @@

+"""
+A top-level linear programming interface.
+.. versionadded:: 0.15.0
+Functions
+---------
+.. autosummary::
+   :toctree: generated/
+    linprog
+    linprog_verbose_callback
+    linprog_terse_callback
+"""
+import numpy as np
+from ._optimize import OptimizeResult, OptimizeWarning
+from warnings import warn
+from ._linprog_highs import _linprog_highs
+from ._linprog_ip import _linprog_ip
+from ._linprog_simplex import _linprog_simplex
+from ._linprog_rs import _linprog_rs
+from ._linprog_doc import (_linprog_highs_doc, _linprog_ip_doc,  # noqa: F401
+                           _linprog_rs_doc, _linprog_simplex_doc,
+                           _linprog_highs_ipm_doc, _linprog_highs_ds_doc)
+from ._linprog_util import (
+    _parse_linprog, _presolve, _get_Abc, _LPProblem, _autoscale,
+    _postsolve, _check_result, _display_summary)
+from copy import deepcopy
+__all__ = ['linprog', 'linprog_verbose_callback', 'linprog_terse_callback']
+__docformat__ = "restructuredtext en"
+LINPROG_METHODS = [
+    'simplex', 'revised simplex', 'interior-point', 'highs', 'highs-ds', 'highs-ipm'
+]
+def linprog_verbose_callback(res):
+    """
+    A sample callback function demonstrating the linprog callback interface.
+    This callback produces detailed output to sys.stdout before each iteration
+    and after the final iteration of the simplex algorithm.
+    Parameters
+    ----------
+    res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
+        x : 1-D array
+            The independent variable vector which optimizes the linear
+            programming problem.
+        fun : float
+            Value of the objective function.
+        success : bool
+            True if the algorithm succeeded in finding an optimal solution.
+        slack : 1-D array
+            The values of the slack variables. Each slack variable corresponds
+            to an inequality constraint. If the slack is zero, then the
+            corresponding constraint is active.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints, that is,
+            ``b - A_eq @ x``
+        phase : int
+            The phase of the optimization being executed. In phase 1 a basic
+            feasible solution is sought and the T has an additional row
+            representing an alternate objective function.
+        status : int
+            An integer representing the exit status of the optimization::
+                 0 : Optimization terminated successfully
+                 1 : Iteration limit reached
+                 2 : Problem appears to be infeasible
+                 3 : Problem appears to be unbounded
+                 4 : Serious numerical difficulties encountered
+        nit : int
+            The number of iterations performed.
+        message : str
+            A string descriptor of the exit status of the optimization.
+    """
+    x = res['x']
+    fun = res['fun']
+    phase = res['phase']
+    status = res['status']
+    nit = res['nit']
+    message = res['message']
+    complete = res['complete']
+    saved_printoptions = np.get_printoptions()
+    np.set_printoptions(linewidth=500,
+                        formatter={'float': lambda x: f"{x: 12.4f}"})
+    if status:
+        print('--------- Simplex Early Exit -------\n')
+        print(f'The simplex method exited early with status {status:d}')
+        print(message)
+    elif complete:
+        print('--------- Simplex Complete --------\n')
+        print(f'Iterations required: {nit}')
+    else:
+        print(f'--------- Iteration {nit:d}  ---------\n')
+    if nit > 0:
+        if phase == 1:
+            print('Current Pseudo-Objective Value:')
+        else:
+            print('Current Objective Value:')
+        print('f = ', fun)
+        print()
+        print('Current Solution Vector:')
+        print('x = ', x)
+        print()
+    np.set_printoptions(**saved_printoptions)
+def linprog_terse_callback(res):
+    """
+    A sample callback function demonstrating the linprog callback interface.
+    This callback produces brief output to sys.stdout before each iteration
+    and after the final iteration of the simplex algorithm.
+    Parameters
+    ----------
+    res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
+        x : 1-D array
+            The independent variable vector which optimizes the linear
+            programming problem.
+        fun : float
+            Value of the objective function.
+        success : bool
+            True if the algorithm succeeded in finding an optimal solution.
+        slack : 1-D array
+            The values of the slack variables. Each slack variable corresponds
+            to an inequality constraint. If the slack is zero, then the
+            corresponding constraint is active.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints, that is,
+            ``b - A_eq @ x``.
+        phase : int
+            The phase of the optimization being executed. In phase 1 a basic
+            feasible solution is sought and the T has an additional row
+            representing an alternate objective function.
+        status : int
+            An integer representing the exit status of the optimization::
+                 0 : Optimization terminated successfully
+                 1 : Iteration limit reached
+                 2 : Problem appears to be infeasible
+                 3 : Problem appears to be unbounded
+                 4 : Serious numerical difficulties encountered
+        nit : int
+            The number of iterations performed.
+        message : str
+            A string descriptor of the exit status of the optimization.
+    """
+    nit = res['nit']
+    x = res['x']
+    if nit == 0:
+        print("Iter:   X:")
+    print(f"{nit: <5d}   ", end="")
+    print(x)
+def linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+            bounds=(0, None), method='highs', callback=None,
+            options=None, x0=None, integrality=None):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+        - minimize ::
+            c @ x
+        - such that ::
+            A_ub @ x <= b_ub
+            A_eq @ x == b_eq
+            lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None``. Other bounds can be
+    specified with ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable.
+        If a single tuple ``(min, max)`` is provided, then ``min`` and ``max``
+        will serve as bounds for all decision variables.
+        Use ``None`` to indicate that there is no bound. For instance, the
+        default bound ``(0, None)`` means that all decision variables are
+        non-negative, and the pair ``(None, None)`` means no bounds at all,
+        i.e. all variables are allowed to be any real.
+    method : str, optional
+        The algorithm used to solve the standard form problem.
+        :ref:`'highs' <optimize.linprog-highs>` (default),
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (legacy),
+        :ref:`'revised simplex' <optimize.linprog-revised_simplex>` (legacy),
+        and
+        :ref:`'simplex' <optimize.linprog-simplex>` (legacy) are supported.
+        The legacy methods are deprecated and will be removed in SciPy 1.11.0.
+    callback : callable, optional
+        If a callback function is provided, it will be called at least once per
+        iteration of the algorithm. The callback function must accept a single
+        `scipy.optimize.OptimizeResult` consisting of the following fields:
+        x : 1-D array
+            The current solution vector.
+        fun : float
+            The current value of the objective function ``c @ x``.
+        success : bool
+            ``True`` when the algorithm has completed successfully.
+        slack : 1-D array
+            The (nominally positive) values of the slack,
+            ``b_ub - A_ub @ x``.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        phase : int
+            The phase of the algorithm being executed.
+        status : int
+            An integer representing the status of the algorithm.
+            ``0`` : Optimization proceeding nominally.
+            ``1`` : Iteration limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : Numerical difficulties encountered.
+            nit : int
+                The current iteration number.
+            message : str
+                A string descriptor of the algorithm status.
+        Callback functions are not currently supported by the HiGHS methods.
+    options : dict, optional
+        A dictionary of solver options. All methods accept the following
+        options:
+        maxiter : int
+            Maximum number of iterations to perform.
+            Default: see method-specific documentation.
+        disp : bool
+            Set to ``True`` to print convergence messages.
+            Default: ``False``.
+        presolve : bool
+            Set to ``False`` to disable automatic presolve.
+            Default: ``True``.
+        All methods except the HiGHS solvers also accept:
+        tol : float
+            A tolerance which determines when a residual is "close enough" to
+            zero to be considered exactly zero.
+        autoscale : bool
+            Set to ``True`` to automatically perform equilibration.
+            Consider using this option if the numerical values in the
+            constraints are separated by several orders of magnitude.
+            Default: ``False``.
+        rr : bool
+            Set to ``False`` to disable automatic redundancy removal.
+            Default: ``True``.
+        rr_method : string
+            Method used to identify and remove redundant rows from the
+            equality constraint matrix after presolve. For problems with
+            dense input, the available methods for redundancy removal are:
+            "SVD":
+                Repeatedly performs singular value decomposition on
+                the matrix, detecting redundant rows based on nonzeros
+                in the left singular vectors that correspond with
+                zero singular values. May be fast when the matrix is
+                nearly full rank.
+            "pivot":
+                Uses the algorithm presented in [5]_ to identify
+                redundant rows.
+            "ID":
+                Uses a randomized interpolative decomposition.
+                Identifies columns of the matrix transpose not used in
+                a full-rank interpolative decomposition of the matrix.
+            None:
+                Uses "svd" if the matrix is nearly full rank, that is,
+                the difference between the matrix rank and the number
+                of rows is less than five. If not, uses "pivot". The
+                behavior of this default is subject to change without
+                prior notice.
+            Default: None.
+            For problems with sparse input, this option is ignored, and the
+            pivot-based algorithm presented in [5]_ is used.
+        For method-specific options, see
+        :func:`show_options('linprog') <show_options>`.
+    x0 : 1-D array, optional
+        Guess values of the decision variables, which will be refined by
+        the optimization algorithm. This argument is currently used only by the
+        'revised simplex' method, and can only be used if `x0` represents a
+        basic feasible solution.
+    integrality : 1-D array or int, optional
+        Indicates the type of integrality constraint on each decision variable.
+        ``0`` : Continuous variable; no integrality constraint.
+        ``1`` : Integer variable; decision variable must be an integer
+        within `bounds`.
+        ``2`` : Semi-continuous variable; decision variable must be within
+        `bounds` or take value ``0``.
+        ``3`` : Semi-integer variable; decision variable must be an integer
+        within `bounds` or take value ``0``.
+        By default, all variables are continuous.
+        For mixed integrality constraints, supply an array of shape `c.shape`.
+        To infer a constraint on each decision variable from shorter inputs,
+        the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
+        This argument is currently used only by the ``'highs'`` method and
+        ignored otherwise.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields
+        below. Note that the return types of the fields may depend on whether
+        the optimization was successful, therefore it is recommended to check
+        `OptimizeResult.status` before relying on the other fields:
+        x : 1-D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1-D array
+            The (nominally positive) values of the slack variables,
+            ``b_ub - A_ub @ x``.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : Numerical difficulties encountered.
+        nit : int
+            The total number of iterations performed in all phases.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+    See Also
+    --------
+    show_options : Additional options accepted by the solvers.
+    Notes
+    -----
+    This section describes the available solvers that can be selected by the
+    'method' parameter.
+    `'highs-ds'` and
+    `'highs-ipm'` are interfaces to the
+    HiGHS simplex and interior-point method solvers [13]_, respectively.
+    `'highs'` (default) chooses between
+    the two automatically. These are the fastest linear
+    programming solvers in SciPy, especially for large, sparse problems;
+    which of these two is faster is problem-dependent.
+    The other solvers (`'interior-point'`, `'revised simplex'`, and
+    `'simplex'`) are legacy methods and will be removed in SciPy 1.11.0.
+    Method *highs-ds* is a wrapper of the C++ high performance dual
+    revised simplex implementation (HSOL) [13]_, [14]_. Method *highs-ipm*
+    is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
+    **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
+    as a simplex solver. Method *highs* chooses between the two automatically.
+    For new code involving `linprog`, we recommend explicitly choosing one of
+    these three method values.
+    .. versionadded:: 1.6.0
+    Method *interior-point* uses the primal-dual path following algorithm
+    as outlined in [4]_. This algorithm supports sparse constraint matrices and
+    is typically faster than the simplex methods, especially for large, sparse
+    problems. Note, however, that the solution returned may be slightly less
+    accurate than those of the simplex methods and will not, in general,
+    correspond with a vertex of the polytope defined by the constraints.
+    .. versionadded:: 1.0.0
+    Method *revised simplex* uses the revised simplex method as described in
+    [9]_, except that a factorization [11]_ of the basis matrix, rather than
+    its inverse, is efficiently maintained and used to solve the linear systems
+    at each iteration of the algorithm.
+    .. versionadded:: 1.3.0
+    Method *simplex* uses a traditional, full-tableau implementation of
+    Dantzig's simplex algorithm [1]_, [2]_ (*not* the
+    Nelder-Mead simplex). This algorithm is included for backwards
+    compatibility and educational purposes.
+    .. versionadded:: 0.15.0
+    Before applying *interior-point*, *revised simplex*, or *simplex*,
+    a presolve procedure based on [8]_ attempts
+    to identify trivial infeasibilities, trivial unboundedness, and potential
+    problem simplifications. Specifically, it checks for:
+    - rows of zeros in ``A_eq`` or ``A_ub``, representing trivial constraints;
+    - columns of zeros in ``A_eq`` `and` ``A_ub``, representing unconstrained
+      variables;
+    - column singletons in ``A_eq``, representing fixed variables; and
+    - column singletons in ``A_ub``, representing simple bounds.
+    If presolve reveals that the problem is unbounded (e.g. an unconstrained
+    and unbounded variable has negative cost) or infeasible (e.g., a row of
+    zeros in ``A_eq`` corresponds with a nonzero in ``b_eq``), the solver
+    terminates with the appropriate status code. Note that presolve terminates
+    as soon as any sign of unboundedness is detected; consequently, a problem
+    may be reported as unbounded when in reality the problem is infeasible
+    (but infeasibility has not been detected yet). Therefore, if it is
+    important to know whether the problem is actually infeasible, solve the
+    problem again with option ``presolve=False``.
+    If neither infeasibility nor unboundedness are detected in a single pass
+    of the presolve, bounds are tightened where possible and fixed
+    variables are removed from the problem. Then, linearly dependent rows
+    of the ``A_eq`` matrix are removed, (unless they represent an
+    infeasibility) to avoid numerical difficulties in the primary solve
+    routine. Note that rows that are nearly linearly dependent (within a
+    prescribed tolerance) may also be removed, which can change the optimal
+    solution in rare cases. If this is a concern, eliminate redundancy from
+    your problem formulation and run with option ``rr=False`` or
+    ``presolve=False``.
+    Several potential improvements can be made here: additional presolve
+    checks outlined in [8]_ should be implemented, the presolve routine should
+    be run multiple times (until no further simplifications can be made), and
+    more of the efficiency improvements from [5]_ should be implemented in the
+    redundancy removal routines.
+    After presolve, the problem is transformed to standard form by converting
+    the (tightened) simple bounds to upper bound constraints, introducing
+    non-negative slack variables for inequality constraints, and expressing
+    unbounded variables as the difference between two non-negative variables.
+    Optionally, the problem is automatically scaled via equilibration [12]_.
+    The selected algorithm solves the standard form problem, and a
+    postprocessing routine converts the result to a solution to the original
+    problem.
+    References
+    ----------
+    .. [1] Dantzig, George B., Linear programming and extensions. Rand
+           Corporation Research Study Princeton Univ. Press, Princeton, NJ,
+           1963
+    .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
+           Mathematical Programming", McGraw-Hill, Chapter 4.
+    .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
+           Mathematics of Operations Research (2), 1977: pp. 103-107.
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    .. [5] Andersen, Erling D. "Finding all linearly dependent rows in
+           large-scale linear programming." Optimization Methods and Software
+           6.3 (1995): 219-227.
+    .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
+           Programming based on Newton's Method." Unpublished Course Notes,
+           March 2004. Available 2/25/2017 at
+           https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
+    .. [7] Fourer, Robert. "Solving Linear Programs by Interior-Point Methods."
+           Unpublished Course Notes, August 26, 2005. Available 2/25/2017 at
+           http://www.4er.org/CourseNotes/Book%20B/B-III.pdf
+    .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
+           programming." Mathematical Programming 71.2 (1995): 221-245.
+    .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
+           programming." Athena Scientific 1 (1997): 997.
+    .. [10] Andersen, Erling D., et al. Implementation of interior point
+            methods for large scale linear programming. HEC/Universite de
+            Geneve, 1996.
+    .. [11] Bartels, Richard H. "A stabilization of the simplex method."
+            Journal in  Numerische Mathematik 16.5 (1971): 414-434.
+    .. [12] Tomlin, J. A. "On scaling linear programming problems."
+            Mathematical Programming Study 4 (1975): 146-166.
+    .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
+            "HiGHS - high performance software for linear optimization."
+            https://highs.dev/
+    .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
+            simplex method." Mathematical Programming Computation, 10 (1),
+            119-142, 2018. DOI: 10.1007/s12532-017-0130-5
+    Examples
+    --------
+    Consider the following problem:
+    .. math::
+        \min_{x_0, x_1} \ -x_0 + 4x_1 & \\
+        \mbox{such that} \ -3x_0 + x_1 & \leq 6,\\
+        -x_0 - 2x_1 & \geq -4,\\
+        x_1 & \geq -3.
+    The problem is not presented in the form accepted by `linprog`. This is
+    easily remedied by converting the "greater than" inequality
+    constraint to a "less than" inequality constraint by
+    multiplying both sides by a factor of :math:`-1`. Note also that the last
+    constraint is really the simple bound :math:`-3 \leq x_1 \leq \infty`.
+    Finally, since there are no bounds on :math:`x_0`, we must explicitly
+    specify the bounds :math:`-\infty \leq x_0 \leq \infty`, as the
+    default is for variables to be non-negative. After collecting coeffecients
+    into arrays and tuples, the input for this problem is:
+    >>> from scipy.optimize import linprog
+    >>> c = [-1, 4]
+    >>> A = [[-3, 1], [1, 2]]
+    >>> b = [6, 4]
+    >>> x0_bounds = (None, None)
+    >>> x1_bounds = (-3, None)
+    >>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds])
+    >>> res.fun
+    -22.0
+    >>> res.x
+    array([10., -3.])
+    >>> res.message
+    'Optimization terminated successfully. (HiGHS Status 7: Optimal)'
+    The marginals (AKA dual values / shadow prices / Lagrange multipliers)
+    and residuals (slacks) are also available.
+    >>> res.ineqlin
+      residual: [ 3.900e+01  0.000e+00]
+     marginals: [-0.000e+00 -1.000e+00]
+    For example, because the marginal associated with the second inequality
+    constraint is -1, we expect the optimal value of the objective function
+    to decrease by ``eps`` if we add a small amount ``eps`` to the right hand
+    side of the second inequality constraint:
+    >>> eps = 0.05
+    >>> b[1] += eps
+    >>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
+    -22.05
+    Also, because the residual on the first inequality constraint is 39, we
+    can decrease the right hand side of the first constraint by 39 without
+    affecting the optimal solution.
+    >>> b = [6, 4]  # reset to original values
+    >>> b[0] -= 39
+    >>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
+    -22.0
+    """
+    meth = method.lower()
+    methods = {"highs", "highs-ds", "highs-ipm",
+               "simplex", "revised simplex", "interior-point"}
+    if meth not in methods:
+        raise ValueError(f"Unknown solver '{method}'")
+    if x0 is not None and meth != "revised simplex":
+        warning_message = "x0 is used only when method is 'revised simplex'. "
+        warn(warning_message, OptimizeWarning, stacklevel=2)
+    if np.any(integrality) and not meth == "highs":
+        integrality = None
+        warning_message = ("Only `method='highs'` supports integer "
+                           "constraints. Ignoring `integrality`.")
+        warn(warning_message, OptimizeWarning, stacklevel=2)
+    elif np.any(integrality):
+        integrality = np.broadcast_to(integrality, np.shape(c))
+    else:
+        integrality = None
+    lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality)
+    lp, solver_options = _parse_linprog(lp, options, meth)
+    tol = solver_options.get('tol', 1e-9)
+    # Give unmodified problem to HiGHS
+    if meth.startswith('highs'):
+        if callback is not None:
+            raise NotImplementedError("HiGHS solvers do not support the "
+                                      "callback interface.")
+        highs_solvers = {'highs-ipm': 'ipm', 'highs-ds': 'simplex',
+                         'highs': None}
+        sol = _linprog_highs(lp, solver=highs_solvers[meth],
+                             **solver_options)
+        sol['status'], sol['message'] = (
+            _check_result(sol['x'], sol['fun'], sol['status'], sol['slack'],
+                          sol['con'], lp.bounds, tol, sol['message'],
+                          integrality))
+        sol['success'] = sol['status'] == 0
+        return OptimizeResult(sol)
+    warn(f"`method='{meth}'` is deprecated and will be removed in SciPy "
+         "1.11.0. Please use one of the HiGHS solvers (e.g. "
+         "`method='highs'`) in new code.", DeprecationWarning, stacklevel=2)
+    iteration = 0
+    complete = False  # will become True if solved in presolve
+    undo = []
+    # Keep the original arrays to calculate slack/residuals for original
+    # problem.
+    lp_o = deepcopy(lp)
+    # Solve trivial problem, eliminate variables, tighten bounds, etc.
+    rr_method = solver_options.pop('rr_method', None)  # need to pop these;
+    rr = solver_options.pop('rr', True)  # they're not passed to methods
+    c0 = 0  # we might get a constant term in the objective
+    if solver_options.pop('presolve', True):
+        (lp, c0, x, undo, complete, status, message) = _presolve(lp, rr,
+                                                                 rr_method,
+                                                                 tol)
+    C, b_scale = 1, 1  # for trivial unscaling if autoscale is not used
+    postsolve_args = (lp_o._replace(bounds=lp.bounds), undo, C, b_scale)
+    if not complete:
+        A, b, c, c0, x0 = _get_Abc(lp, c0)
+        if solver_options.pop('autoscale', False):
+            A, b, c, x0, C, b_scale = _autoscale(A, b, c, x0)
+            postsolve_args = postsolve_args[:-2] + (C, b_scale)
+        if meth == 'simplex':
+            x, status, message, iteration = _linprog_simplex(
+                c, c0=c0, A=A, b=b, callback=callback,
+                postsolve_args=postsolve_args, **solver_options)
+        elif meth == 'interior-point':
+            x, status, message, iteration = _linprog_ip(
+                c, c0=c0, A=A, b=b, callback=callback,
+                postsolve_args=postsolve_args, **solver_options)
+        elif meth == 'revised simplex':
+            x, status, message, iteration = _linprog_rs(
+                c, c0=c0, A=A, b=b, x0=x0, callback=callback,
+                postsolve_args=postsolve_args, **solver_options)
+    # Eliminate artificial variables, re-introduce presolved variables, etc.
+    disp = solver_options.get('disp', False)
+    x, fun, slack, con = _postsolve(x, postsolve_args, complete)
+    status, message = _check_result(x, fun, status, slack, con, lp_o.bounds,
+                                    tol, message, integrality)
+    if disp:
+        _display_summary(message, status, fun, iteration)
+    sol = {
+        'x': x,
+        'fun': fun,
+        'slack': slack,
+        'con': con,
+        'status': status,
+        'message': message,
+        'nit': iteration,
+        'success': status == 0}
+    return OptimizeResult(sol)

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py ADDED Viewed

	@@ -0,0 +1,1434 @@

+"""
+Created on Sat Aug 22 19:49:17 2020
+@author: matth
+"""
+def _linprog_highs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                       bounds=None, method='highs', callback=None,
+                       maxiter=None, disp=False, presolve=True,
+                       time_limit=None,
+                       dual_feasibility_tolerance=None,
+                       primal_feasibility_tolerance=None,
+                       ipm_optimality_tolerance=None,
+                       simplex_dual_edge_weight_strategy=None,
+                       mip_rel_gap=None,
+                       **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using one of the HiGHS solvers.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'highs', which chooses
+        automatically between
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>` and
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+        :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+        :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
+        are also available.
+    integrality : 1-D array or int, optional
+        Indicates the type of integrality constraint on each decision variable.
+        ``0`` : Continuous variable; no integrality constraint.
+        ``1`` : Integer variable; decision variable must be an integer
+        within `bounds`.
+        ``2`` : Semi-continuous variable; decision variable must be within
+        `bounds` or take value ``0``.
+        ``3`` : Semi-integer variable; decision variable must be an integer
+        within `bounds` or take value ``0``.
+        By default, all variables are continuous.
+        For mixed integrality constraints, supply an array of shape `c.shape`.
+        To infer a constraint on each decision variable from shorter inputs,
+        the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
+        This argument is currently used only by the ``'highs'`` method and
+        ignored otherwise.
+    Options
+    -------
+    maxiter : int
+        The maximum number of iterations to perform in either phase.
+        For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
+        include the number of crossover iterations. Default is the largest
+        possible value for an ``int`` on the platform.
+    disp : bool (default: ``False``)
+        Set to ``True`` if indicators of optimization status are to be
+        printed to the console during optimization.
+    presolve : bool (default: ``True``)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    time_limit : float
+        The maximum time in seconds allotted to solve the problem;
+        default is the largest possible value for a ``double`` on the
+        platform.
+    dual_feasibility_tolerance : double (default: 1e-07)
+        Dual feasibility tolerance for
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
+        The minimum of this and ``primal_feasibility_tolerance``
+        is used for the feasibility tolerance of
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+    primal_feasibility_tolerance : double (default: 1e-07)
+        Primal feasibility tolerance for
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
+        The minimum of this and ``dual_feasibility_tolerance``
+        is used for the feasibility tolerance of
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+    ipm_optimality_tolerance : double (default: ``1e-08``)
+        Optimality tolerance for
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+        Minimum allowable value is 1e-12.
+    simplex_dual_edge_weight_strategy : str (default: None)
+        Strategy for simplex dual edge weights. The default, ``None``,
+        automatically selects one of the following.
+        ``'dantzig'`` uses Dantzig's original strategy of choosing the most
+        negative reduced cost.
+        ``'devex'`` uses the strategy described in [15]_.
+        ``steepest`` uses the exact steepest edge strategy as described in
+        [16]_.
+        ``'steepest-devex'`` begins with the exact steepest edge strategy
+        until the computation is too costly or inexact and then switches to
+        the devex method.
+        Currently, ``None`` always selects ``'steepest-devex'``, but this
+        may change as new options become available.
+    mip_rel_gap : double (default: None)
+        Termination criterion for MIP solver: solver will terminate when the
+        gap between the primal objective value and the dual objective bound,
+        scaled by the primal objective value, is <= mip_rel_gap.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        ``unknown_options`` is non-empty, a warning is issued listing
+        all unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1D array
+            The (nominally positive) values of the slack,
+            ``b_ub - A_ub @ x``.
+        con : 1D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration or time limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : The HiGHS solver ran into a problem.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed.
+            For the HiGHS simplex method, this includes iterations in all
+            phases. For the HiGHS interior-point method, this does not include
+            crossover iterations.
+        crossover_nit : int
+            The number of primal/dual pushes performed during the
+            crossover routine for the HiGHS interior-point method.
+            This is ``0`` for the HiGHS simplex method.
+        ineqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            inequality constraints, `b_ub`. A dictionary consisting of the
+            fields:
+            residual : np.ndnarray
+                The (nominally positive) values of the slack variables,
+                ``b_ub - A_ub @ x``.  This quantity is also commonly
+                referred to as "slack".
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                inequality constraints, `b_ub`.
+        eqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            equality constraints, `b_eq`.  A dictionary consisting of the
+            fields:
+            residual : np.ndarray
+                The (nominally zero) residuals of the equality constraints,
+                ``b_eq - A_eq @ x``.
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                equality constraints, `b_eq`.
+        lower, upper : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            lower and upper bounds on decision variables, `bounds`.
+            residual : np.ndarray
+                The (nominally positive) values of the quantity
+                ``x - lb`` (lower) or ``ub - x`` (upper).
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the lower and upper
+                `bounds`.
+    Notes
+    -----
+    Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
+    of the C++ high performance dual revised simplex implementation (HSOL)
+    [13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
+    is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
+    **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
+    as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
+    between the two automatically. For new code involving `linprog`, we
+    recommend explicitly choosing one of these three method values instead of
+    :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+    :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+    :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
+    The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
+    `marginals`, or partial derivatives of the objective function with respect
+    to the right-hand side of each constraint. These partial derivatives are
+    also referred to as "Lagrange multipliers", "dual values", and
+    "shadow prices". The sign convention of `marginals` is opposite that
+    of Lagrange multipliers produced by many nonlinear solvers.
+    References
+    ----------
+    .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
+           "HiGHS - high performance software for linear optimization."
+           https://highs.dev/
+    .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
+           simplex method." Mathematical Programming Computation, 10 (1),
+           119-142, 2018. DOI: 10.1007/s12532-017-0130-5
+    .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
+            Mathematical programming 5.1 (1973): 1-28.
+    .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
+            simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
+    """
+    pass
+def _linprog_highs_ds_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                          bounds=None, method='highs-ds', callback=None,
+                          maxiter=None, disp=False, presolve=True,
+                          time_limit=None,
+                          dual_feasibility_tolerance=None,
+                          primal_feasibility_tolerance=None,
+                          simplex_dual_edge_weight_strategy=None,
+                          **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using the HiGHS dual simplex solver.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'highs-ds'.
+        :ref:`'highs' <optimize.linprog-highs>`,
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+        :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+        :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
+        are also available.
+    Options
+    -------
+    maxiter : int
+        The maximum number of iterations to perform in either phase.
+        Default is the largest possible value for an ``int`` on the platform.
+    disp : bool (default: ``False``)
+        Set to ``True`` if indicators of optimization status are to be
+        printed to the console during optimization.
+    presolve : bool (default: ``True``)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    time_limit : float
+        The maximum time in seconds allotted to solve the problem;
+        default is the largest possible value for a ``double`` on the
+        platform.
+    dual_feasibility_tolerance : double (default: 1e-07)
+        Dual feasibility tolerance for
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
+    primal_feasibility_tolerance : double (default: 1e-07)
+        Primal feasibility tolerance for
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
+    simplex_dual_edge_weight_strategy : str (default: None)
+        Strategy for simplex dual edge weights. The default, ``None``,
+        automatically selects one of the following.
+        ``'dantzig'`` uses Dantzig's original strategy of choosing the most
+        negative reduced cost.
+        ``'devex'`` uses the strategy described in [15]_.
+        ``steepest`` uses the exact steepest edge strategy as described in
+        [16]_.
+        ``'steepest-devex'`` begins with the exact steepest edge strategy
+        until the computation is too costly or inexact and then switches to
+        the devex method.
+        Currently, ``None`` always selects ``'steepest-devex'``, but this
+        may change as new options become available.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        ``unknown_options`` is non-empty, a warning is issued listing
+        all unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1D array
+            The (nominally positive) values of the slack,
+            ``b_ub - A_ub @ x``.
+        con : 1D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration or time limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : The HiGHS solver ran into a problem.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed. This includes iterations
+            in all phases.
+        crossover_nit : int
+            This is always ``0`` for the HiGHS simplex method.
+            For the HiGHS interior-point method, this is the number of
+            primal/dual pushes performed during the crossover routine.
+        ineqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            inequality constraints, `b_ub`. A dictionary consisting of the
+            fields:
+            residual : np.ndnarray
+                The (nominally positive) values of the slack variables,
+                ``b_ub - A_ub @ x``.  This quantity is also commonly
+                referred to as "slack".
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                inequality constraints, `b_ub`.
+        eqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            equality constraints, `b_eq`.  A dictionary consisting of the
+            fields:
+            residual : np.ndarray
+                The (nominally zero) residuals of the equality constraints,
+                ``b_eq - A_eq @ x``.
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                equality constraints, `b_eq`.
+        lower, upper : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            lower and upper bounds on decision variables, `bounds`.
+            residual : np.ndarray
+                The (nominally positive) values of the quantity
+                ``x - lb`` (lower) or ``ub - x`` (upper).
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the lower and upper
+                `bounds`.
+    Notes
+    -----
+    Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
+    of the C++ high performance dual revised simplex implementation (HSOL)
+    [13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
+    is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
+    **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
+    as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
+    between the two automatically. For new code involving `linprog`, we
+    recommend explicitly choosing one of these three method values instead of
+    :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+    :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+    :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
+    The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
+    `marginals`, or partial derivatives of the objective function with respect
+    to the right-hand side of each constraint. These partial derivatives are
+    also referred to as "Lagrange multipliers", "dual values", and
+    "shadow prices". The sign convention of `marginals` is opposite that
+    of Lagrange multipliers produced by many nonlinear solvers.
+    References
+    ----------
+    .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
+           "HiGHS - high performance software for linear optimization."
+           https://highs.dev/
+    .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
+           simplex method." Mathematical Programming Computation, 10 (1),
+           119-142, 2018. DOI: 10.1007/s12532-017-0130-5
+    .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
+            Mathematical programming 5.1 (1973): 1-28.
+    .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
+            simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
+    """
+    pass
+def _linprog_highs_ipm_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                           bounds=None, method='highs-ipm', callback=None,
+                           maxiter=None, disp=False, presolve=True,
+                           time_limit=None,
+                           dual_feasibility_tolerance=None,
+                           primal_feasibility_tolerance=None,
+                           ipm_optimality_tolerance=None,
+                           **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using the HiGHS interior point solver.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'highs-ipm'.
+        :ref:`'highs-ipm' <optimize.linprog-highs>`,
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+        :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+        :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
+        are also available.
+    Options
+    -------
+    maxiter : int
+        The maximum number of iterations to perform in either phase.
+        For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
+        include the number of crossover iterations. Default is the largest
+        possible value for an ``int`` on the platform.
+    disp : bool (default: ``False``)
+        Set to ``True`` if indicators of optimization status are to be
+        printed to the console during optimization.
+    presolve : bool (default: ``True``)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    time_limit : float
+        The maximum time in seconds allotted to solve the problem;
+        default is the largest possible value for a ``double`` on the
+        platform.
+    dual_feasibility_tolerance : double (default: 1e-07)
+        The minimum of this and ``primal_feasibility_tolerance``
+        is used for the feasibility tolerance of
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+    primal_feasibility_tolerance : double (default: 1e-07)
+        The minimum of this and ``dual_feasibility_tolerance``
+        is used for the feasibility tolerance of
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+    ipm_optimality_tolerance : double (default: ``1e-08``)
+        Optimality tolerance for
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
+        Minimum allowable value is 1e-12.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        ``unknown_options`` is non-empty, a warning is issued listing
+        all unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1D array
+            The (nominally positive) values of the slack,
+            ``b_ub - A_ub @ x``.
+        con : 1D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration or time limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : The HiGHS solver ran into a problem.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed.
+            For the HiGHS interior-point method, this does not include
+            crossover iterations.
+        crossover_nit : int
+            The number of primal/dual pushes performed during the
+            crossover routine for the HiGHS interior-point method.
+        ineqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            inequality constraints, `b_ub`. A dictionary consisting of the
+            fields:
+            residual : np.ndnarray
+                The (nominally positive) values of the slack variables,
+                ``b_ub - A_ub @ x``.  This quantity is also commonly
+                referred to as "slack".
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                inequality constraints, `b_ub`.
+        eqlin : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            equality constraints, `b_eq`.  A dictionary consisting of the
+            fields:
+            residual : np.ndarray
+                The (nominally zero) residuals of the equality constraints,
+                ``b_eq - A_eq @ x``.
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the right-hand side of the
+                equality constraints, `b_eq`.
+        lower, upper : OptimizeResult
+            Solution and sensitivity information corresponding to the
+            lower and upper bounds on decision variables, `bounds`.
+            residual : np.ndarray
+                The (nominally positive) values of the quantity
+                ``x - lb`` (lower) or ``ub - x`` (upper).
+            marginals : np.ndarray
+                The sensitivity (partial derivative) of the objective
+                function with respect to the lower and upper
+                `bounds`.
+    Notes
+    -----
+    Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
+    is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
+    **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
+    as a simplex solver.
+    Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
+    of the C++ high performance dual revised simplex implementation (HSOL)
+    [13]_, [14]_. Method :ref:`'highs' <optimize.linprog-highs>` chooses
+    between the two automatically. For new code involving `linprog`, we
+    recommend explicitly choosing one of these three method values instead of
+    :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+    :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+    :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
+    The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
+    `marginals`, or partial derivatives of the objective function with respect
+    to the right-hand side of each constraint. These partial derivatives are
+    also referred to as "Lagrange multipliers", "dual values", and
+    "shadow prices". The sign convention of `marginals` is opposite that
+    of Lagrange multipliers produced by many nonlinear solvers.
+    References
+    ----------
+    .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
+           "HiGHS - high performance software for linear optimization."
+           https://highs.dev/
+    .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
+           simplex method." Mathematical Programming Computation, 10 (1),
+           119-142, 2018. DOI: 10.1007/s12532-017-0130-5
+    """
+    pass
+def _linprog_ip_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                    bounds=None, method='interior-point', callback=None,
+                    maxiter=1000, disp=False, presolve=True,
+                    tol=1e-8, autoscale=False, rr=True,
+                    alpha0=.99995, beta=0.1, sparse=False,
+                    lstsq=False, sym_pos=True, cholesky=True, pc=True,
+                    ip=False, permc_spec='MMD_AT_PLUS_A', **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using the interior-point method of
+    [4]_.
+    .. deprecated:: 1.9.0
+        `method='interior-point'` will be removed in SciPy 1.11.0.
+        It is replaced by `method='highs'` because the latter is
+        faster and more robust.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'interior-point'.
+        :ref:`'highs' <optimize.linprog-highs>`,
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
+        :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
+        :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
+        are also available.
+    callback : callable, optional
+        Callback function to be executed once per iteration.
+    Options
+    -------
+    maxiter : int (default: 1000)
+        The maximum number of iterations of the algorithm.
+    disp : bool (default: False)
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    presolve : bool (default: True)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    tol : float (default: 1e-8)
+        Termination tolerance to be used for all termination criteria;
+        see [4]_ Section 4.5.
+    autoscale : bool (default: False)
+        Set to ``True`` to automatically perform equilibration.
+        Consider using this option if the numerical values in the
+        constraints are separated by several orders of magnitude.
+    rr : bool (default: True)
+        Set to ``False`` to disable automatic redundancy removal.
+    alpha0 : float (default: 0.99995)
+        The maximal step size for Mehrota's predictor-corrector search
+        direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
+    beta : float (default: 0.1)
+        The desired reduction of the path parameter :math:`\mu` (see [6]_)
+        when Mehrota's predictor-corrector is not in use (uncommon).
+    sparse : bool (default: False)
+        Set to ``True`` if the problem is to be treated as sparse after
+        presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
+        this option will automatically be set ``True``, and the problem
+        will be treated as sparse even during presolve. If your constraint
+        matrices contain mostly zeros and the problem is not very small (less
+        than about 100 constraints or variables), consider setting ``True``
+        or providing ``A_eq`` and ``A_ub`` as sparse matrices.
+    lstsq : bool (default: ``False``)
+        Set to ``True`` if the problem is expected to be very poorly
+        conditioned. This should always be left ``False`` unless severe
+        numerical difficulties are encountered. Leave this at the default
+        unless you receive a warning message suggesting otherwise.
+    sym_pos : bool (default: True)
+        Leave ``True`` if the problem is expected to yield a well conditioned
+        symmetric positive definite normal equation matrix
+        (almost always). Leave this at the default unless you receive
+        a warning message suggesting otherwise.
+    cholesky : bool (default: True)
+        Set to ``True`` if the normal equations are to be solved by explicit
+        Cholesky decomposition followed by explicit forward/backward
+        substitution. This is typically faster for problems
+        that are numerically well-behaved.
+    pc : bool (default: True)
+        Leave ``True`` if the predictor-corrector method of Mehrota is to be
+        used. This is almost always (if not always) beneficial.
+    ip : bool (default: False)
+        Set to ``True`` if the improved initial point suggestion due to [4]_
+        Section 4.3 is desired. Whether this is beneficial or not
+        depends on the problem.
+    permc_spec : str (default: 'MMD_AT_PLUS_A')
+        (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
+        True``, and no SuiteSparse.)
+        A matrix is factorized in each iteration of the algorithm.
+        This option specifies how to permute the columns of the matrix for
+        sparsity preservation. Acceptable values are:
+        - ``NATURAL``: natural ordering.
+        - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
+        - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
+        - ``COLAMD``: approximate minimum degree column ordering.
+        This option can impact the convergence of the
+        interior point algorithm; test different values to determine which
+        performs best for your problem. For more information, refer to
+        ``scipy.sparse.linalg.splu``.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1-D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1-D array
+            The (nominally positive) values of the slack variables,
+            ``b_ub - A_ub @ x``.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : Numerical difficulties encountered.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed in all phases.
+    Notes
+    -----
+    This method implements the algorithm outlined in [4]_ with ideas from [8]_
+    and a structure inspired by the simpler methods of [6]_.
+    The primal-dual path following method begins with initial 'guesses' of
+    the primal and dual variables of the standard form problem and iteratively
+    attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
+    problem with a gradually reduced logarithmic barrier term added to the
+    objective. This particular implementation uses a homogeneous self-dual
+    formulation, which provides certificates of infeasibility or unboundedness
+    where applicable.
+    The default initial point for the primal and dual variables is that
+    defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
+    point option ``ip=True``), an alternate (potentially improved) starting
+    point can be calculated according to the additional recommendations of
+    [4]_ Section 4.4.
+    A search direction is calculated using the predictor-corrector method
+    (single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
+    (A potential improvement would be to implement the method of multiple
+    corrections described in [4]_ Section 4.2.) In practice, this is
+    accomplished by solving the normal equations, [4]_ Section 5.1 Equations
+    8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
+    8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
+    solving the normal equations rather than 8.25 directly is that the
+    matrices involved are symmetric positive definite, so Cholesky
+    decomposition can be used rather than the more expensive LU factorization.
+    With default options, the solver used to perform the factorization depends
+    on third-party software availability and the conditioning of the problem.
+    For dense problems, solvers are tried in the following order:
+    1. ``scipy.linalg.cho_factor``
+    2. ``scipy.linalg.solve`` with option ``sym_pos=True``
+    3. ``scipy.linalg.solve`` with option ``sym_pos=False``
+    4. ``scipy.linalg.lstsq``
+    For sparse problems:
+    1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are
+       installed)
+    2. ``scipy.sparse.linalg.factorized`` (if scikit-umfpack and SuiteSparse
+       are installed)
+    3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
+    4. ``scipy.sparse.linalg.lsqr``
+    If the solver fails for any reason, successively more robust (but slower)
+    solvers are attempted in the order indicated. Attempting, failing, and
+    re-starting factorization can be time consuming, so if the problem is
+    numerically challenging, options can be set to  bypass solvers that are
+    failing. Setting ``cholesky=False`` skips to solver 2,
+    ``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
+    to solver 4 for both sparse and dense problems.
+    Potential improvements for combatting issues associated with dense
+    columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
+    [10]_ Section 4.1-4.2; the latter also discusses the alleviation of
+    accuracy issues associated with the substitution approach to free
+    variables.
+    After calculating the search direction, the maximum possible step size
+    that does not activate the non-negativity constraints is calculated, and
+    the smaller of this step size and unity is applied (as in [4]_ Section
+    4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
+    The new point is tested according to the termination conditions of [4]_
+    Section 4.5. The same tolerance, which can be set using the ``tol`` option,
+    is used for all checks. (A potential improvement would be to expose
+    the different tolerances to be set independently.) If optimality,
+    unboundedness, or infeasibility is detected, the solve procedure
+    terminates; otherwise it repeats.
+    Whereas the top level ``linprog`` module expects a problem of form:
+    Minimize::
+        c @ x
+    Subject to::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+         lb <= x <= ub
+    where ``lb = 0`` and ``ub = None`` unless set in ``bounds``. The problem
+    is automatically converted to the form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    for solution. That is, the original problem contains equality, upper-bound
+    and variable constraints whereas the method specific solver requires
+    equality constraints and variable non-negativity. ``linprog`` converts the
+    original problem to standard form by converting the simple bounds to upper
+    bound constraints, introducing non-negative slack variables for inequality
+    constraints, and expressing unbounded variables as the difference between
+    two non-negative variables. The problem is converted back to the original
+    form before results are reported.
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
+           Programming based on Newton's Method." Unpublished Course Notes,
+           March 2004. Available 2/25/2017 at
+           https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
+    .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
+           programming." Mathematical Programming 71.2 (1995): 221-245.
+    .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
+           programming." Athena Scientific 1 (1997): 997.
+    .. [10] Andersen, Erling D., et al. Implementation of interior point
+            methods for large scale linear programming. HEC/Universite de
+            Geneve, 1996.
+    """
+    pass
+def _linprog_rs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                    bounds=None, method='interior-point', callback=None,
+                    x0=None, maxiter=5000, disp=False, presolve=True,
+                    tol=1e-12, autoscale=False, rr=True, maxupdate=10,
+                    mast=False, pivot="mrc", **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using the revised simplex method.
+    .. deprecated:: 1.9.0
+        `method='revised simplex'` will be removed in SciPy 1.11.0.
+        It is replaced by `method='highs'` because the latter is
+        faster and more robust.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'revised simplex'.
+        :ref:`'highs' <optimize.linprog-highs>`,
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+        and :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
+        are also available.
+    callback : callable, optional
+        Callback function to be executed once per iteration.
+    x0 : 1-D array, optional
+        Guess values of the decision variables, which will be refined by
+        the optimization algorithm. This argument is currently used only by the
+        'revised simplex' method, and can only be used if `x0` represents a
+        basic feasible solution.
+    Options
+    -------
+    maxiter : int (default: 5000)
+       The maximum number of iterations to perform in either phase.
+    disp : bool (default: False)
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    presolve : bool (default: True)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    tol : float (default: 1e-12)
+        The tolerance which determines when a solution is "close enough" to
+        zero in Phase 1 to be considered a basic feasible solution or close
+        enough to positive to serve as an optimal solution.
+    autoscale : bool (default: False)
+        Set to ``True`` to automatically perform equilibration.
+        Consider using this option if the numerical values in the
+        constraints are separated by several orders of magnitude.
+    rr : bool (default: True)
+        Set to ``False`` to disable automatic redundancy removal.
+    maxupdate : int (default: 10)
+        The maximum number of updates performed on the LU factorization.
+        After this many updates is reached, the basis matrix is factorized
+        from scratch.
+    mast : bool (default: False)
+        Minimize Amortized Solve Time. If enabled, the average time to solve
+        a linear system using the basis factorization is measured. Typically,
+        the average solve time will decrease with each successive solve after
+        initial factorization, as factorization takes much more time than the
+        solve operation (and updates). Eventually, however, the updated
+        factorization becomes sufficiently complex that the average solve time
+        begins to increase. When this is detected, the basis is refactorized
+        from scratch. Enable this option to maximize speed at the risk of
+        nondeterministic behavior. Ignored if ``maxupdate`` is 0.
+    pivot : "mrc" or "bland" (default: "mrc")
+        Pivot rule: Minimum Reduced Cost ("mrc") or Bland's rule ("bland").
+        Choose Bland's rule if iteration limit is reached and cycling is
+        suspected.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1-D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1-D array
+            The (nominally positive) values of the slack variables,
+            ``b_ub - A_ub @ x``.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : Numerical difficulties encountered.
+            ``5`` : Problem has no constraints; turn presolve on.
+            ``6`` : Invalid guess provided.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed in all phases.
+    Notes
+    -----
+    Method *revised simplex* uses the revised simplex method as described in
+    [9]_, except that a factorization [11]_ of the basis matrix, rather than
+    its inverse, is efficiently maintained and used to solve the linear systems
+    at each iteration of the algorithm.
+    References
+    ----------
+    .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
+           programming." Athena Scientific 1 (1997): 997.
+    .. [11] Bartels, Richard H. "A stabilization of the simplex method."
+            Journal in  Numerische Mathematik 16.5 (1971): 414-434.
+    """
+    pass
+def _linprog_simplex_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
+                         bounds=None, method='interior-point', callback=None,
+                         maxiter=5000, disp=False, presolve=True,
+                         tol=1e-12, autoscale=False, rr=True, bland=False,
+                         **unknown_options):
+    r"""
+    Linear programming: minimize a linear objective function subject to linear
+    equality and inequality constraints using the tableau-based simplex method.
+    .. deprecated:: 1.9.0
+        `method='simplex'` will be removed in SciPy 1.11.0.
+        It is replaced by `method='highs'` because the latter is
+        faster and more robust.
+    Linear programming solves problems of the following form:
+    .. math::
+        \min_x \ & c^T x \\
+        \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
+        & A_{eq} x = b_{eq},\\
+        & l \leq x \leq u ,
+    where :math:`x` is a vector of decision variables; :math:`c`,
+    :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
+    :math:`A_{ub}` and :math:`A_{eq}` are matrices.
+    Alternatively, that's:
+    minimize::
+        c @ x
+    such that::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+        lb <= x <= ub
+    Note that by default ``lb = 0`` and ``ub = None`` unless specified with
+    ``bounds``.
+    Parameters
+    ----------
+    c : 1-D array
+        The coefficients of the linear objective function to be minimized.
+    A_ub : 2-D array, optional
+        The inequality constraint matrix. Each row of ``A_ub`` specifies the
+        coefficients of a linear inequality constraint on ``x``.
+    b_ub : 1-D array, optional
+        The inequality constraint vector. Each element represents an
+        upper bound on the corresponding value of ``A_ub @ x``.
+    A_eq : 2-D array, optional
+        The equality constraint matrix. Each row of ``A_eq`` specifies the
+        coefficients of a linear equality constraint on ``x``.
+    b_eq : 1-D array, optional
+        The equality constraint vector. Each element of ``A_eq @ x`` must equal
+        the corresponding element of ``b_eq``.
+    bounds : sequence, optional
+        A sequence of ``(min, max)`` pairs for each element in ``x``, defining
+        the minimum and maximum values of that decision variable. Use ``None``
+        to indicate that there is no bound. By default, bounds are
+        ``(0, None)`` (all decision variables are non-negative).
+        If a single tuple ``(min, max)`` is provided, then ``min`` and
+        ``max`` will serve as bounds for all decision variables.
+    method : str
+        This is the method-specific documentation for 'simplex'.
+        :ref:`'highs' <optimize.linprog-highs>`,
+        :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
+        :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
+        :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
+        and :ref:`'revised simplex' <optimize.linprog-revised_simplex>`
+        are also available.
+    callback : callable, optional
+        Callback function to be executed once per iteration.
+    Options
+    -------
+    maxiter : int (default: 5000)
+       The maximum number of iterations to perform in either phase.
+    disp : bool (default: False)
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    presolve : bool (default: True)
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if
+        presolve is to be disabled.
+    tol : float (default: 1e-12)
+        The tolerance which determines when a solution is "close enough" to
+        zero in Phase 1 to be considered a basic feasible solution or close
+        enough to positive to serve as an optimal solution.
+    autoscale : bool (default: False)
+        Set to ``True`` to automatically perform equilibration.
+        Consider using this option if the numerical values in the
+        constraints are separated by several orders of magnitude.
+    rr : bool (default: True)
+        Set to ``False`` to disable automatic redundancy removal.
+    bland : bool
+        If True, use Bland's anti-cycling rule [3]_ to choose pivots to
+        prevent cycling. If False, choose pivots which should lead to a
+        converged solution more quickly. The latter method is subject to
+        cycling (non-convergence) in rare instances.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    res : OptimizeResult
+        A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
+        x : 1-D array
+            The values of the decision variables that minimizes the
+            objective function while satisfying the constraints.
+        fun : float
+            The optimal value of the objective function ``c @ x``.
+        slack : 1-D array
+            The (nominally positive) values of the slack variables,
+            ``b_ub - A_ub @ x``.
+        con : 1-D array
+            The (nominally zero) residuals of the equality constraints,
+            ``b_eq - A_eq @ x``.
+        success : bool
+            ``True`` when the algorithm succeeds in finding an optimal
+            solution.
+        status : int
+            An integer representing the exit status of the algorithm.
+            ``0`` : Optimization terminated successfully.
+            ``1`` : Iteration limit reached.
+            ``2`` : Problem appears to be infeasible.
+            ``3`` : Problem appears to be unbounded.
+            ``4`` : Numerical difficulties encountered.
+        message : str
+            A string descriptor of the exit status of the algorithm.
+        nit : int
+            The total number of iterations performed in all phases.
+    References
+    ----------
+    .. [1] Dantzig, George B., Linear programming and extensions. Rand
+           Corporation Research Study Princeton Univ. Press, Princeton, NJ,
+           1963
+    .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
+           Mathematical Programming", McGraw-Hill, Chapter 4.
+    .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
+           Mathematics of Operations Research (2), 1977: pp. 103-107.
+    """
+    pass

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py ADDED Viewed

	@@ -0,0 +1,440 @@

+"""HiGHS Linear Optimization Methods
+Interface to HiGHS linear optimization software.
+https://highs.dev/
+.. versionadded:: 1.5.0
+References
+----------
+.. [1] Q. Huangfu and J.A.J. Hall. "Parallelizing the dual revised simplex
+           method." Mathematical Programming Computation, 10 (1), 119-142,
+           2018. DOI: 10.1007/s12532-017-0130-5
+"""
+import inspect
+import numpy as np
+from ._optimize import OptimizeWarning, OptimizeResult
+from warnings import warn
+from ._highs._highs_wrapper import _highs_wrapper
+from ._highs._highs_constants import (
+    CONST_INF,
+    MESSAGE_LEVEL_NONE,
+    HIGHS_OBJECTIVE_SENSE_MINIMIZE,
+    MODEL_STATUS_NOTSET,
+    MODEL_STATUS_LOAD_ERROR,
+    MODEL_STATUS_MODEL_ERROR,
+    MODEL_STATUS_PRESOLVE_ERROR,
+    MODEL_STATUS_SOLVE_ERROR,
+    MODEL_STATUS_POSTSOLVE_ERROR,
+    MODEL_STATUS_MODEL_EMPTY,
+    MODEL_STATUS_OPTIMAL,
+    MODEL_STATUS_INFEASIBLE,
+    MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE,
+    MODEL_STATUS_UNBOUNDED,
+    MODEL_STATUS_REACHED_DUAL_OBJECTIVE_VALUE_UPPER_BOUND
+    as MODEL_STATUS_RDOVUB,
+    MODEL_STATUS_REACHED_OBJECTIVE_TARGET,
+    MODEL_STATUS_REACHED_TIME_LIMIT,
+    MODEL_STATUS_REACHED_ITERATION_LIMIT,
+    HIGHS_SIMPLEX_STRATEGY_DUAL,
+    HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
+    HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
+    HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
+    HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
+    HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
+)
+from scipy.sparse import csc_matrix, vstack, issparse
+def _highs_to_scipy_status_message(highs_status, highs_message):
+    """Converts HiGHS status number/message to SciPy status number/message"""
+    scipy_statuses_messages = {
+        None: (4, "HiGHS did not provide a status code. "),
+        MODEL_STATUS_NOTSET: (4, ""),
+        MODEL_STATUS_LOAD_ERROR: (4, ""),
+        MODEL_STATUS_MODEL_ERROR: (2, ""),
+        MODEL_STATUS_PRESOLVE_ERROR: (4, ""),
+        MODEL_STATUS_SOLVE_ERROR: (4, ""),
+        MODEL_STATUS_POSTSOLVE_ERROR: (4, ""),
+        MODEL_STATUS_MODEL_EMPTY: (4, ""),
+        MODEL_STATUS_RDOVUB: (4, ""),
+        MODEL_STATUS_REACHED_OBJECTIVE_TARGET: (4, ""),
+        MODEL_STATUS_OPTIMAL: (0, "Optimization terminated successfully. "),
+        MODEL_STATUS_REACHED_TIME_LIMIT: (1, "Time limit reached. "),
+        MODEL_STATUS_REACHED_ITERATION_LIMIT: (1, "Iteration limit reached. "),
+        MODEL_STATUS_INFEASIBLE: (2, "The problem is infeasible. "),
+        MODEL_STATUS_UNBOUNDED: (3, "The problem is unbounded. "),
+        MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE: (4, "The problem is unbounded "
+                                               "or infeasible. ")}
+    unrecognized = (4, "The HiGHS status code was not recognized. ")
+    scipy_status, scipy_message = (
+        scipy_statuses_messages.get(highs_status, unrecognized))
+    scipy_message = (f"{scipy_message}"
+                     f"(HiGHS Status {highs_status}: {highs_message})")
+    return scipy_status, scipy_message
+def _replace_inf(x):
+    # Replace `np.inf` with CONST_INF
+    infs = np.isinf(x)
+    with np.errstate(invalid="ignore"):
+        x[infs] = np.sign(x[infs])*CONST_INF
+    return x
+def _convert_to_highs_enum(option, option_str, choices):
+    # If option is in the choices we can look it up, if not use
+    # the default value taken from function signature and warn:
+    try:
+        return choices[option.lower()]
+    except AttributeError:
+        return choices[option]
+    except KeyError:
+        sig = inspect.signature(_linprog_highs)
+        default_str = sig.parameters[option_str].default
+        warn(f"Option {option_str} is {option}, but only values in "
+             f"{set(choices.keys())} are allowed. Using default: "
+             f"{default_str}.",
+             OptimizeWarning, stacklevel=3)
+        return choices[default_str]
+def _linprog_highs(lp, solver, time_limit=None, presolve=True,
+                   disp=False, maxiter=None,
+                   dual_feasibility_tolerance=None,
+                   primal_feasibility_tolerance=None,
+                   ipm_optimality_tolerance=None,
+                   simplex_dual_edge_weight_strategy=None,
+                   mip_rel_gap=None,
+                   mip_max_nodes=None,
+                   **unknown_options):
+    r"""
+    Solve the following linear programming problem using one of the HiGHS
+    solvers:
+    User-facing documentation is in _linprog_doc.py.
+    Parameters
+    ----------
+    lp :  _LPProblem
+        A ``scipy.optimize._linprog_util._LPProblem`` ``namedtuple``.
+    solver : "ipm" or "simplex" or None
+        Which HiGHS solver to use.  If ``None``, "simplex" will be used.
+    Options
+    -------
+    maxiter : int
+        The maximum number of iterations to perform in either phase. For
+        ``solver='ipm'``, this does not include the number of crossover
+        iterations.  Default is the largest possible value for an ``int``
+        on the platform.
+    disp : bool
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration; default ``False``.
+    time_limit : float
+        The maximum time in seconds allotted to solve the problem; default is
+        the largest possible value for a ``double`` on the platform.
+    presolve : bool
+        Presolve attempts to identify trivial infeasibilities,
+        identify trivial unboundedness, and simplify the problem before
+        sending it to the main solver. It is generally recommended
+        to keep the default setting ``True``; set to ``False`` if presolve is
+        to be disabled.
+    dual_feasibility_tolerance : double
+        Dual feasibility tolerance.  Default is 1e-07.
+        The minimum of this and ``primal_feasibility_tolerance``
+        is used for the feasibility tolerance when ``solver='ipm'``.
+    primal_feasibility_tolerance : double
+        Primal feasibility tolerance.  Default is 1e-07.
+        The minimum of this and ``dual_feasibility_tolerance``
+        is used for the feasibility tolerance when ``solver='ipm'``.
+    ipm_optimality_tolerance : double
+        Optimality tolerance for ``solver='ipm'``.  Default is 1e-08.
+        Minimum possible value is 1e-12 and must be smaller than the largest
+        possible value for a ``double`` on the platform.
+    simplex_dual_edge_weight_strategy : str (default: None)
+        Strategy for simplex dual edge weights. The default, ``None``,
+        automatically selects one of the following.
+        ``'dantzig'`` uses Dantzig's original strategy of choosing the most
+        negative reduced cost.
+        ``'devex'`` uses the strategy described in [15]_.
+        ``steepest`` uses the exact steepest edge strategy as described in
+        [16]_.
+        ``'steepest-devex'`` begins with the exact steepest edge strategy
+        until the computation is too costly or inexact and then switches to
+        the devex method.
+        Currently, using ``None`` always selects ``'steepest-devex'``, but this
+        may change as new options become available.
+    mip_max_nodes : int
+        The maximum number of nodes allotted to solve the problem; default is
+        the largest possible value for a ``HighsInt`` on the platform.
+        Ignored if not using the MIP solver.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        ``unknown_options`` is non-empty, a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    sol : dict
+        A dictionary consisting of the fields:
+            x : 1D array
+                The values of the decision variables that minimizes the
+                objective function while satisfying the constraints.
+            fun : float
+                The optimal value of the objective function ``c @ x``.
+            slack : 1D array
+                The (nominally positive) values of the slack,
+                ``b_ub - A_ub @ x``.
+            con : 1D array
+                The (nominally zero) residuals of the equality constraints,
+                ``b_eq - A_eq @ x``.
+            success : bool
+                ``True`` when the algorithm succeeds in finding an optimal
+                solution.
+            status : int
+                An integer representing the exit status of the algorithm.
+                ``0`` : Optimization terminated successfully.
+                ``1`` : Iteration or time limit reached.
+                ``2`` : Problem appears to be infeasible.
+                ``3`` : Problem appears to be unbounded.
+                ``4`` : The HiGHS solver ran into a problem.
+            message : str
+                A string descriptor of the exit status of the algorithm.
+            nit : int
+                The total number of iterations performed.
+                For ``solver='simplex'``, this includes iterations in all
+                phases. For ``solver='ipm'``, this does not include
+                crossover iterations.
+            crossover_nit : int
+                The number of primal/dual pushes performed during the
+                crossover routine for ``solver='ipm'``.  This is ``0``
+                for ``solver='simplex'``.
+            ineqlin : OptimizeResult
+                Solution and sensitivity information corresponding to the
+                inequality constraints, `b_ub`. A dictionary consisting of the
+                fields:
+                residual : np.ndnarray
+                    The (nominally positive) values of the slack variables,
+                    ``b_ub - A_ub @ x``.  This quantity is also commonly
+                    referred to as "slack".
+                marginals : np.ndarray
+                    The sensitivity (partial derivative) of the objective
+                    function with respect to the right-hand side of the
+                    inequality constraints, `b_ub`.
+            eqlin : OptimizeResult
+                Solution and sensitivity information corresponding to the
+                equality constraints, `b_eq`.  A dictionary consisting of the
+                fields:
+                residual : np.ndarray
+                    The (nominally zero) residuals of the equality constraints,
+                    ``b_eq - A_eq @ x``.
+                marginals : np.ndarray
+                    The sensitivity (partial derivative) of the objective
+                    function with respect to the right-hand side of the
+                    equality constraints, `b_eq`.
+            lower, upper : OptimizeResult
+                Solution and sensitivity information corresponding to the
+                lower and upper bounds on decision variables, `bounds`.
+                residual : np.ndarray
+                    The (nominally positive) values of the quantity
+                    ``x - lb`` (lower) or ``ub - x`` (upper).
+                marginals : np.ndarray
+                    The sensitivity (partial derivative) of the objective
+                    function with respect to the lower and upper
+                    `bounds`.
+            mip_node_count : int
+                The number of subproblems or "nodes" solved by the MILP
+                solver. Only present when `integrality` is not `None`.
+            mip_dual_bound : float
+                The MILP solver's final estimate of the lower bound on the
+                optimal solution. Only present when `integrality` is not
+                `None`.
+            mip_gap : float
+                The difference between the final objective function value
+                and the final dual bound, scaled by the final objective
+                function value. Only present when `integrality` is not
+                `None`.
+    Notes
+    -----
+    The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
+    `marginals`, or partial derivatives of the objective function with respect
+    to the right-hand side of each constraint. These partial derivatives are
+    also referred to as "Lagrange multipliers", "dual values", and
+    "shadow prices". The sign convention of `marginals` is opposite that
+    of Lagrange multipliers produced by many nonlinear solvers.
+    References
+    ----------
+    .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
+            Mathematical programming 5.1 (1973): 1-28.
+    .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
+            simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
+    """
+    if unknown_options:
+        message = (f"Unrecognized options detected: {unknown_options}. "
+                   "These will be passed to HiGHS verbatim.")
+        warn(message, OptimizeWarning, stacklevel=3)
+    # Map options to HiGHS enum values
+    simplex_dual_edge_weight_strategy_enum = _convert_to_highs_enum(
+        simplex_dual_edge_weight_strategy,
+        'simplex_dual_edge_weight_strategy',
+        choices={'dantzig': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
+                 'devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
+                 'steepest-devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
+                 'steepest':
+                 HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
+                 None: None})
+    c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality = lp
+    lb, ub = bounds.T.copy()  # separate bounds, copy->C-cntgs
+    # highs_wrapper solves LHS <= A*x <= RHS, not equality constraints
+    with np.errstate(invalid="ignore"):
+        lhs_ub = -np.ones_like(b_ub)*np.inf  # LHS of UB constraints is -inf
+    rhs_ub = b_ub  # RHS of UB constraints is b_ub
+    lhs_eq = b_eq  # Equality constraint is inequality
+    rhs_eq = b_eq  # constraint with LHS=RHS
+    lhs = np.concatenate((lhs_ub, lhs_eq))
+    rhs = np.concatenate((rhs_ub, rhs_eq))
+    if issparse(A_ub) or issparse(A_eq):
+        A = vstack((A_ub, A_eq))
+    else:
+        A = np.vstack((A_ub, A_eq))
+    A = csc_matrix(A)
+    options = {
+        'presolve': presolve,
+        'sense': HIGHS_OBJECTIVE_SENSE_MINIMIZE,
+        'solver': solver,
+        'time_limit': time_limit,
+        'highs_debug_level': MESSAGE_LEVEL_NONE,
+        'dual_feasibility_tolerance': dual_feasibility_tolerance,
+        'ipm_optimality_tolerance': ipm_optimality_tolerance,
+        'log_to_console': disp,
+        'mip_max_nodes': mip_max_nodes,
+        'output_flag': disp,
+        'primal_feasibility_tolerance': primal_feasibility_tolerance,
+        'simplex_dual_edge_weight_strategy':
+            simplex_dual_edge_weight_strategy_enum,
+        'simplex_strategy': HIGHS_SIMPLEX_STRATEGY_DUAL,
+        'simplex_crash_strategy': HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
+        'ipm_iteration_limit': maxiter,
+        'simplex_iteration_limit': maxiter,
+        'mip_rel_gap': mip_rel_gap,
+    }
+    options.update(unknown_options)
+    # np.inf doesn't work; use very large constant
+    rhs = _replace_inf(rhs)
+    lhs = _replace_inf(lhs)
+    lb = _replace_inf(lb)
+    ub = _replace_inf(ub)
+    if integrality is None or np.sum(integrality) == 0:
+        integrality = np.empty(0)
+    else:
+        integrality = np.array(integrality)
+    res = _highs_wrapper(c, A.indptr, A.indices, A.data, lhs, rhs,
+                         lb, ub, integrality.astype(np.uint8), options)
+    # HiGHS represents constraints as lhs/rhs, so
+    # Ax + s = b => Ax = b - s
+    # and we need to split up s by A_ub and A_eq
+    if 'slack' in res:
+        slack = res['slack']
+        con = np.array(slack[len(b_ub):])
+        slack = np.array(slack[:len(b_ub)])
+    else:
+        slack, con = None, None
+    # lagrange multipliers for equalities/inequalities and upper/lower bounds
+    if 'lambda' in res:
+        lamda = res['lambda']
+        marg_ineqlin = np.array(lamda[:len(b_ub)])
+        marg_eqlin = np.array(lamda[len(b_ub):])
+        marg_upper = np.array(res['marg_bnds'][1, :])
+        marg_lower = np.array(res['marg_bnds'][0, :])
+    else:
+        marg_ineqlin, marg_eqlin = None, None
+        marg_upper, marg_lower = None, None
+    # this needs to be updated if we start choosing the solver intelligently
+    # Convert to scipy-style status and message
+    highs_status = res.get('status', None)
+    highs_message = res.get('message', None)
+    status, message = _highs_to_scipy_status_message(highs_status,
+                                                     highs_message)
+    x = np.array(res['x']) if 'x' in res else None
+    sol = {'x': x,
+           'slack': slack,
+           'con': con,
+           'ineqlin': OptimizeResult({
+               'residual': slack,
+               'marginals': marg_ineqlin,
+           }),
+           'eqlin': OptimizeResult({
+               'residual': con,
+               'marginals': marg_eqlin,
+           }),
+           'lower': OptimizeResult({
+               'residual': None if x is None else x - lb,
+               'marginals': marg_lower,
+           }),
+           'upper': OptimizeResult({
+               'residual': None if x is None else ub - x,
+               'marginals': marg_upper
+            }),
+           'fun': res.get('fun'),
+           'status': status,
+           'success': res['status'] == MODEL_STATUS_OPTIMAL,
+           'message': message,
+           'nit': res.get('simplex_nit', 0) or res.get('ipm_nit', 0),
+           'crossover_nit': res.get('crossover_nit'),
+           }
+    if np.any(x) and integrality is not None:
+        sol.update({
+            'mip_node_count': res.get('mip_node_count', 0),
+            'mip_dual_bound': res.get('mip_dual_bound', 0.0),
+            'mip_gap': res.get('mip_gap', 0.0),
+        })
+    return sol

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py ADDED Viewed

	@@ -0,0 +1,1126 @@

+"""Interior-point method for linear programming
+The *interior-point* method uses the primal-dual path following algorithm
+outlined in [1]_. This algorithm supports sparse constraint matrices and
+is typically faster than the simplex methods, especially for large, sparse
+problems. Note, however, that the solution returned may be slightly less
+accurate than those of the simplex methods and will not, in general,
+correspond with a vertex of the polytope defined by the constraints.
+    .. versionadded:: 1.0.0
+References
+----------
+.. [1] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+       optimizer for linear programming: an implementation of the
+       homogeneous algorithm." High performance optimization. Springer US,
+       2000. 197-232.
+"""
+# Author: Matt Haberland
+import numpy as np
+import scipy as sp
+import scipy.sparse as sps
+from warnings import warn
+from scipy.linalg import LinAlgError
+from ._optimize import OptimizeWarning, OptimizeResult, _check_unknown_options
+from ._linprog_util import _postsolve
+has_umfpack = True
+has_cholmod = True
+try:
+    import sksparse  # noqa: F401
+    from sksparse.cholmod import cholesky as cholmod  # noqa: F401
+    from sksparse.cholmod import analyze as cholmod_analyze
+except ImportError:
+    has_cholmod = False
+try:
+    import scikits.umfpack  # test whether to use factorized  # noqa: F401
+except ImportError:
+    has_umfpack = False
+def _get_solver(M, sparse=False, lstsq=False, sym_pos=True,
+                cholesky=True, permc_spec='MMD_AT_PLUS_A'):
+    """
+    Given solver options, return a handle to the appropriate linear system
+    solver.
+    Parameters
+    ----------
+    M : 2-D array
+        As defined in [4] Equation 8.31
+    sparse : bool (default = False)
+        True if the system to be solved is sparse. This is typically set
+        True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
+    lstsq : bool (default = False)
+        True if the system is ill-conditioned and/or (nearly) singular and
+        thus a more robust least-squares solver is desired. This is sometimes
+        needed as the solution is approached.
+    sym_pos : bool (default = True)
+        True if the system matrix is symmetric positive definite
+        Sometimes this needs to be set false as the solution is approached,
+        even when the system should be symmetric positive definite, due to
+        numerical difficulties.
+    cholesky : bool (default = True)
+        True if the system is to be solved by Cholesky, rather than LU,
+        decomposition. This is typically faster unless the problem is very
+        small or prone to numerical difficulties.
+    permc_spec : str (default = 'MMD_AT_PLUS_A')
+        Sparsity preservation strategy used by SuperLU. Acceptable values are:
+        - ``NATURAL``: natural ordering.
+        - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
+        - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
+        - ``COLAMD``: approximate minimum degree column ordering.
+        See SuperLU documentation.
+    Returns
+    -------
+    solve : function
+        Handle to the appropriate solver function
+    """
+    try:
+        if sparse:
+            if lstsq:
+                def solve(r, sym_pos=False):
+                    return sps.linalg.lsqr(M, r)[0]
+            elif cholesky:
+                try:
+                    # Will raise an exception in the first call,
+                    # or when the matrix changes due to a new problem
+                    _get_solver.cholmod_factor.cholesky_inplace(M)
+                except Exception:
+                    _get_solver.cholmod_factor = cholmod_analyze(M)
+                    _get_solver.cholmod_factor.cholesky_inplace(M)
+                solve = _get_solver.cholmod_factor
+            else:
+                if has_umfpack and sym_pos:
+                    solve = sps.linalg.factorized(M)
+                else:  # factorized doesn't pass permc_spec
+                    solve = sps.linalg.splu(M, permc_spec=permc_spec).solve
+        else:
+            if lstsq:  # sometimes necessary as solution is approached
+                def solve(r):
+                    return sp.linalg.lstsq(M, r)[0]
+            elif cholesky:
+                L = sp.linalg.cho_factor(M)
+                def solve(r):
+                    return sp.linalg.cho_solve(L, r)
+            else:
+                # this seems to cache the matrix factorization, so solving
+                # with multiple right hand sides is much faster
+                def solve(r, sym_pos=sym_pos):
+                    if sym_pos:
+                        return sp.linalg.solve(M, r, assume_a="pos")
+                    else:
+                        return sp.linalg.solve(M, r)
+    # There are many things that can go wrong here, and it's hard to say
+    # what all of them are. It doesn't really matter: if the matrix can't be
+    # factorized, return None. get_solver will be called again with different
+    # inputs, and a new routine will try to factorize the matrix.
+    except KeyboardInterrupt:
+        raise
+    except Exception:
+        return None
+    return solve
+def _get_delta(A, b, c, x, y, z, tau, kappa, gamma, eta, sparse=False,
+               lstsq=False, sym_pos=True, cholesky=True, pc=True, ip=False,
+               permc_spec='MMD_AT_PLUS_A'):
+    """
+    Given standard form problem defined by ``A``, ``b``, and ``c``;
+    current variable estimates ``x``, ``y``, ``z``, ``tau``, and ``kappa``;
+    algorithmic parameters ``gamma and ``eta;
+    and options ``sparse``, ``lstsq``, ``sym_pos``, ``cholesky``, ``pc``
+    (predictor-corrector), and ``ip`` (initial point improvement),
+    get the search direction for increments to the variable estimates.
+    Parameters
+    ----------
+    As defined in [4], except:
+    sparse : bool
+        True if the system to be solved is sparse. This is typically set
+        True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
+    lstsq : bool
+        True if the system is ill-conditioned and/or (nearly) singular and
+        thus a more robust least-squares solver is desired. This is sometimes
+        needed as the solution is approached.
+    sym_pos : bool
+        True if the system matrix is symmetric positive definite
+        Sometimes this needs to be set false as the solution is approached,
+        even when the system should be symmetric positive definite, due to
+        numerical difficulties.
+    cholesky : bool
+        True if the system is to be solved by Cholesky, rather than LU,
+        decomposition. This is typically faster unless the problem is very
+        small or prone to numerical difficulties.
+    pc : bool
+        True if the predictor-corrector method of Mehrota is to be used. This
+        is almost always (if not always) beneficial. Even though it requires
+        the solution of an additional linear system, the factorization
+        is typically (implicitly) reused so solution is efficient, and the
+        number of algorithm iterations is typically reduced.
+    ip : bool
+        True if the improved initial point suggestion due to [4] section 4.3
+        is desired. It's unclear whether this is beneficial.
+    permc_spec : str (default = 'MMD_AT_PLUS_A')
+        (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
+        True``.) A matrix is factorized in each iteration of the algorithm.
+        This option specifies how to permute the columns of the matrix for
+        sparsity preservation. Acceptable values are:
+        - ``NATURAL``: natural ordering.
+        - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
+        - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
+        - ``COLAMD``: approximate minimum degree column ordering.
+        This option can impact the convergence of the
+        interior point algorithm; test different values to determine which
+        performs best for your problem. For more information, refer to
+        ``scipy.sparse.linalg.splu``.
+    Returns
+    -------
+    Search directions as defined in [4]
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    if A.shape[0] == 0:
+        # If there are no constraints, some solvers fail (understandably)
+        # rather than returning empty solution. This gets the job done.
+        sparse, lstsq, sym_pos, cholesky = False, False, True, False
+    n_x = len(x)
+    # [4] Equation 8.8
+    r_P = b * tau - A.dot(x)
+    r_D = c * tau - A.T.dot(y) - z
+    r_G = c.dot(x) - b.transpose().dot(y) + kappa
+    mu = (x.dot(z) + tau * kappa) / (n_x + 1)
+    #  Assemble M from [4] Equation 8.31
+    Dinv = x / z
+    if sparse:
+        M = A.dot(sps.diags(Dinv, 0, format="csc").dot(A.T))
+    else:
+        M = A.dot(Dinv.reshape(-1, 1) * A.T)
+    solve = _get_solver(M, sparse, lstsq, sym_pos, cholesky, permc_spec)
+    # pc: "predictor-corrector" [4] Section 4.1
+    # In development this option could be turned off
+    # but it always seems to improve performance substantially
+    n_corrections = 1 if pc else 0
+    i = 0
+    alpha, d_x, d_z, d_tau, d_kappa = 0, 0, 0, 0, 0
+    while i <= n_corrections:
+        # Reference [4] Eq. 8.6
+        rhatp = eta(gamma) * r_P
+        rhatd = eta(gamma) * r_D
+        rhatg = eta(gamma) * r_G
+        # Reference [4] Eq. 8.7
+        rhatxs = gamma * mu - x * z
+        rhattk = gamma * mu - tau * kappa
+        if i == 1:
+            if ip:  # if the correction is to get "initial point"
+                # Reference [4] Eq. 8.23
+                rhatxs = ((1 - alpha) * gamma * mu -
+                          x * z - alpha**2 * d_x * d_z)
+                rhattk = ((1 - alpha) * gamma * mu -
+                    tau * kappa -
+                    alpha**2 * d_tau * d_kappa)
+            else:  # if the correction is for "predictor-corrector"
+                # Reference [4] Eq. 8.13
+                rhatxs -= d_x * d_z
+                rhattk -= d_tau * d_kappa
+        # sometimes numerical difficulties arise as the solution is approached
+        # this loop tries to solve the equations using a sequence of functions
+        # for solve. For dense systems, the order is:
+        # 1. scipy.linalg.cho_factor/scipy.linalg.cho_solve,
+        # 2. scipy.linalg.solve w/ sym_pos = True,
+        # 3. scipy.linalg.solve w/ sym_pos = False, and if all else fails
+        # 4. scipy.linalg.lstsq
+        # For sparse systems, the order is:
+        # 1. sksparse.cholmod.cholesky (if available)
+        # 2. scipy.sparse.linalg.factorized (if umfpack available)
+        # 3. scipy.sparse.linalg.splu
+        # 4. scipy.sparse.linalg.lsqr
+        solved = False
+        while not solved:
+            try:
+                # [4] Equation 8.28
+                p, q = _sym_solve(Dinv, A, c, b, solve)
+                # [4] Equation 8.29
+                u, v = _sym_solve(Dinv, A, rhatd -
+                                  (1 / x) * rhatxs, rhatp, solve)
+                if np.any(np.isnan(p)) or np.any(np.isnan(q)):
+                    raise LinAlgError
+                solved = True
+            except (LinAlgError, ValueError, TypeError) as e:
+                # Usually this doesn't happen. If it does, it happens when
+                # there are redundant constraints or when approaching the
+                # solution. If so, change solver.
+                if cholesky:
+                    cholesky = False
+                    warn(
+                        "Solving system with option 'cholesky':True "
+                        "failed. It is normal for this to happen "
+                        "occasionally, especially as the solution is "
+                        "approached. However, if you see this frequently, "
+                        "consider setting option 'cholesky' to False.",
+                        OptimizeWarning, stacklevel=5)
+                elif sym_pos:
+                    sym_pos = False
+                    warn(
+                        "Solving system with option 'sym_pos':True "
+                        "failed. It is normal for this to happen "
+                        "occasionally, especially as the solution is "
+                        "approached. However, if you see this frequently, "
+                        "consider setting option 'sym_pos' to False.",
+                        OptimizeWarning, stacklevel=5)
+                elif not lstsq:
+                    lstsq = True
+                    warn(
+                        "Solving system with option 'sym_pos':False "
+                        "failed. This may happen occasionally, "
+                        "especially as the solution is "
+                        "approached. However, if you see this frequently, "
+                        "your problem may be numerically challenging. "
+                        "If you cannot improve the formulation, consider "
+                        "setting 'lstsq' to True. Consider also setting "
+                        "`presolve` to True, if it is not already.",
+                        OptimizeWarning, stacklevel=5)
+                else:
+                    raise e
+                solve = _get_solver(M, sparse, lstsq, sym_pos,
+                                    cholesky, permc_spec)
+        # [4] Results after 8.29
+        d_tau = ((rhatg + 1 / tau * rhattk - (-c.dot(u) + b.dot(v))) /
+                 (1 / tau * kappa + (-c.dot(p) + b.dot(q))))
+        d_x = u + p * d_tau
+        d_y = v + q * d_tau
+        # [4] Relations between  after 8.25 and 8.26
+        d_z = (1 / x) * (rhatxs - z * d_x)
+        d_kappa = 1 / tau * (rhattk - kappa * d_tau)
+        # [4] 8.12 and "Let alpha be the maximal possible step..." before 8.23
+        alpha = _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, 1)
+        if ip:  # initial point - see [4] 4.4
+            gamma = 10
+        else:  # predictor-corrector, [4] definition after 8.12
+            beta1 = 0.1  # [4] pg. 220 (Table 8.1)
+            gamma = (1 - alpha)**2 * min(beta1, (1 - alpha))
+        i += 1
+    return d_x, d_y, d_z, d_tau, d_kappa
+def _sym_solve(Dinv, A, r1, r2, solve):
+    """
+    An implementation of [4] equation 8.31 and 8.32
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    # [4] 8.31
+    r = r2 + A.dot(Dinv * r1)
+    v = solve(r)
+    # [4] 8.32
+    u = Dinv * (A.T.dot(v) - r1)
+    return u, v
+def _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, alpha0):
+    """
+    An implementation of [4] equation 8.21
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    # [4] 4.3 Equation 8.21, ignoring 8.20 requirement
+    # same step is taken in primal and dual spaces
+    # alpha0 is basically beta3 from [4] Table 8.1, but instead of beta3
+    # the value 1 is used in Mehrota corrector and initial point correction
+    i_x = d_x < 0
+    i_z = d_z < 0
+    alpha_x = alpha0 * np.min(x[i_x] / -d_x[i_x]) if np.any(i_x) else 1
+    alpha_tau = alpha0 * tau / -d_tau if d_tau < 0 else 1
+    alpha_z = alpha0 * np.min(z[i_z] / -d_z[i_z]) if np.any(i_z) else 1
+    alpha_kappa = alpha0 * kappa / -d_kappa if d_kappa < 0 else 1
+    alpha = np.min([1, alpha_x, alpha_tau, alpha_z, alpha_kappa])
+    return alpha
+def _get_message(status):
+    """
+    Given problem status code, return a more detailed message.
+    Parameters
+    ----------
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Serious numerical difficulties encountered
+    Returns
+    -------
+    message : str
+        A string descriptor of the exit status of the optimization.
+    """
+    messages = (
+        ["Optimization terminated successfully.",
+         "The iteration limit was reached before the algorithm converged.",
+         "The algorithm terminated successfully and determined that the "
+         "problem is infeasible.",
+         "The algorithm terminated successfully and determined that the "
+         "problem is unbounded.",
+         "Numerical difficulties were encountered before the problem "
+         "converged. Please check your problem formulation for errors, "
+         "independence of linear equality constraints, and reasonable "
+         "scaling and matrix condition numbers. If you continue to "
+         "encounter this error, please submit a bug report."
+         ])
+    return messages[status]
+def _do_step(x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha):
+    """
+    An implementation of [4] Equation 8.9
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    x = x + alpha * d_x
+    tau = tau + alpha * d_tau
+    z = z + alpha * d_z
+    kappa = kappa + alpha * d_kappa
+    y = y + alpha * d_y
+    return x, y, z, tau, kappa
+def _get_blind_start(shape):
+    """
+    Return the starting point from [4] 4.4
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    m, n = shape
+    x0 = np.ones(n)
+    y0 = np.zeros(m)
+    z0 = np.ones(n)
+    tau0 = 1
+    kappa0 = 1
+    return x0, y0, z0, tau0, kappa0
+def _indicators(A, b, c, c0, x, y, z, tau, kappa):
+    """
+    Implementation of several equations from [4] used as indicators of
+    the status of optimization.
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    # residuals for termination are relative to initial values
+    x0, y0, z0, tau0, kappa0 = _get_blind_start(A.shape)
+    # See [4], Section 4 - The Homogeneous Algorithm, Equation 8.8
+    def r_p(x, tau):
+        return b * tau - A.dot(x)
+    def r_d(y, z, tau):
+        return c * tau - A.T.dot(y) - z
+    def r_g(x, y, kappa):
+        return kappa + c.dot(x) - b.dot(y)
+    # np.dot unpacks if they are arrays of size one
+    def mu(x, tau, z, kappa):
+        return (x.dot(z) + np.dot(tau, kappa)) / (len(x) + 1)
+    obj = c.dot(x / tau) + c0
+    def norm(a):
+        return np.linalg.norm(a)
+    # See [4], Section 4.5 - The Stopping Criteria
+    r_p0 = r_p(x0, tau0)
+    r_d0 = r_d(y0, z0, tau0)
+    r_g0 = r_g(x0, y0, kappa0)
+    mu_0 = mu(x0, tau0, z0, kappa0)
+    rho_A = norm(c.T.dot(x) - b.T.dot(y)) / (tau + norm(b.T.dot(y)))
+    rho_p = norm(r_p(x, tau)) / max(1, norm(r_p0))
+    rho_d = norm(r_d(y, z, tau)) / max(1, norm(r_d0))
+    rho_g = norm(r_g(x, y, kappa)) / max(1, norm(r_g0))
+    rho_mu = mu(x, tau, z, kappa) / mu_0
+    return rho_p, rho_d, rho_A, rho_g, rho_mu, obj
+def _display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj, header=False):
+    """
+    Print indicators of optimization status to the console.
+    Parameters
+    ----------
+    rho_p : float
+        The (normalized) primal feasibility, see [4] 4.5
+    rho_d : float
+        The (normalized) dual feasibility, see [4] 4.5
+    rho_g : float
+        The (normalized) duality gap, see [4] 4.5
+    alpha : float
+        The step size, see [4] 4.3
+    rho_mu : float
+        The (normalized) path parameter, see [4] 4.5
+    obj : float
+        The objective function value of the current iterate
+    header : bool
+        True if a header is to be printed
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    """
+    if header:
+        print("Primal Feasibility ",
+              "Dual Feasibility   ",
+              "Duality Gap        ",
+              "Step            ",
+              "Path Parameter     ",
+              "Objective          ")
+    # no clue why this works
+    fmt = '{0:<20.13}{1:<20.13}{2:<20.13}{3:<17.13}{4:<20.13}{5:<20.13}'
+    print(fmt.format(
+        float(rho_p),
+        float(rho_d),
+        float(rho_g),
+        alpha if isinstance(alpha, str) else float(alpha),
+        float(rho_mu),
+        float(obj)))
+def _ip_hsd(A, b, c, c0, alpha0, beta, maxiter, disp, tol, sparse, lstsq,
+            sym_pos, cholesky, pc, ip, permc_spec, callback, postsolve_args):
+    r"""
+    Solve a linear programming problem in standard form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    using the interior point method of [4].
+    Parameters
+    ----------
+    A : 2-D array
+        2-D array such that ``A @ x``, gives the values of the equality
+        constraints at ``x``.
+    b : 1-D array
+        1-D array of values representing the RHS of each equality constraint
+        (row) in ``A`` (for standard form problem).
+    c : 1-D array
+        Coefficients of the linear objective function to be minimized (for
+        standard form problem).
+    c0 : float
+        Constant term in objective function due to fixed (and eliminated)
+        variables. (Purely for display.)
+    alpha0 : float
+        The maximal step size for Mehrota's predictor-corrector search
+        direction; see :math:`\beta_3`of [4] Table 8.1
+    beta : float
+        The desired reduction of the path parameter :math:`\mu` (see  [6]_)
+    maxiter : int
+        The maximum number of iterations of the algorithm.
+    disp : bool
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    tol : float
+        Termination tolerance; see [4]_ Section 4.5.
+    sparse : bool
+        Set to ``True`` if the problem is to be treated as sparse. However,
+        the inputs ``A_eq`` and ``A_ub`` should nonetheless be provided as
+        (dense) arrays rather than sparse matrices.
+    lstsq : bool
+        Set to ``True`` if the problem is expected to be very poorly
+        conditioned. This should always be left as ``False`` unless severe
+        numerical difficulties are frequently encountered, and a better option
+        would be to improve the formulation of the problem.
+    sym_pos : bool
+        Leave ``True`` if the problem is expected to yield a well conditioned
+        symmetric positive definite normal equation matrix (almost always).
+    cholesky : bool
+        Set to ``True`` if the normal equations are to be solved by explicit
+        Cholesky decomposition followed by explicit forward/backward
+        substitution. This is typically faster for moderate, dense problems
+        that are numerically well-behaved.
+    pc : bool
+        Leave ``True`` if the predictor-corrector method of Mehrota is to be
+        used. This is almost always (if not always) beneficial.
+    ip : bool
+        Set to ``True`` if the improved initial point suggestion due to [4]_
+        Section 4.3 is desired. It's unclear whether this is beneficial.
+    permc_spec : str (default = 'MMD_AT_PLUS_A')
+        (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
+        True``.) A matrix is factorized in each iteration of the algorithm.
+        This option specifies how to permute the columns of the matrix for
+        sparsity preservation. Acceptable values are:
+        - ``NATURAL``: natural ordering.
+        - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
+        - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
+        - ``COLAMD``: approximate minimum degree column ordering.
+        This option can impact the convergence of the
+        interior point algorithm; test different values to determine which
+        performs best for your problem. For more information, refer to
+        ``scipy.sparse.linalg.splu``.
+    callback : callable, optional
+        If a callback function is provided, it will be called within each
+        iteration of the algorithm. The callback function must accept a single
+        `scipy.optimize.OptimizeResult` consisting of the following fields:
+            x : 1-D array
+                Current solution vector
+            fun : float
+                Current value of the objective function
+            success : bool
+                True only when an algorithm has completed successfully,
+                so this is always False as the callback function is called
+                only while the algorithm is still iterating.
+            slack : 1-D array
+                The values of the slack variables. Each slack variable
+                corresponds to an inequality constraint. If the slack is zero,
+                the corresponding constraint is active.
+            con : 1-D array
+                The (nominally zero) residuals of the equality constraints,
+                that is, ``b - A_eq @ x``
+            phase : int
+                The phase of the algorithm being executed. This is always
+                1 for the interior-point method because it has only one phase.
+            status : int
+                For revised simplex, this is always 0 because if a different
+                status is detected, the algorithm terminates.
+            nit : int
+                The number of iterations performed.
+            message : str
+                A string descriptor of the exit status of the optimization.
+    postsolve_args : tuple
+        Data needed by _postsolve to convert the solution to the standard-form
+        problem into the solution to the original problem.
+    Returns
+    -------
+    x_hat : float
+        Solution vector (for standard form problem).
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Serious numerical difficulties encountered
+    message : str
+        A string descriptor of the exit status of the optimization.
+    iteration : int
+        The number of iterations taken to solve the problem
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
+           Programming based on Newton's Method." Unpublished Course Notes,
+           March 2004. Available 2/25/2017 at:
+           https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
+    """
+    iteration = 0
+    # default initial point
+    x, y, z, tau, kappa = _get_blind_start(A.shape)
+    # first iteration is special improvement of initial point
+    ip = ip if pc else False
+    # [4] 4.5
+    rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
+        A, b, c, c0, x, y, z, tau, kappa)
+    go = rho_p > tol or rho_d > tol or rho_A > tol  # we might get lucky : )
+    if disp:
+        _display_iter(rho_p, rho_d, rho_g, "-", rho_mu, obj, header=True)
+    if callback is not None:
+        x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
+        res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
+                              'con': con, 'nit': iteration, 'phase': 1,
+                              'complete': False, 'status': 0,
+                              'message': "", 'success': False})
+        callback(res)
+    status = 0
+    message = "Optimization terminated successfully."
+    if sparse:
+        A = sps.csc_matrix(A)
+    while go:
+        iteration += 1
+        if ip:  # initial point
+            # [4] Section 4.4
+            gamma = 1
+            def eta(g):
+                return 1
+        else:
+            # gamma = 0 in predictor step according to [4] 4.1
+            # if predictor/corrector is off, use mean of complementarity [6]
+            # 5.1 / [4] Below Figure 10-4
+            gamma = 0 if pc else beta * np.mean(z * x)
+            # [4] Section 4.1
+            def eta(g=gamma):
+                return 1 - g
+        try:
+            # Solve [4] 8.6 and 8.7/8.13/8.23
+            d_x, d_y, d_z, d_tau, d_kappa = _get_delta(
+                A, b, c, x, y, z, tau, kappa, gamma, eta,
+                sparse, lstsq, sym_pos, cholesky, pc, ip, permc_spec)
+            if ip:  # initial point
+                # [4] 4.4
+                # Formula after 8.23 takes a full step regardless if this will
+                # take it negative
+                alpha = 1.0
+                x, y, z, tau, kappa = _do_step(
+                    x, y, z, tau, kappa, d_x, d_y,
+                    d_z, d_tau, d_kappa, alpha)
+                x[x < 1] = 1
+                z[z < 1] = 1
+                tau = max(1, tau)
+                kappa = max(1, kappa)
+                ip = False  # done with initial point
+            else:
+                # [4] Section 4.3
+                alpha = _get_step(x, d_x, z, d_z, tau,
+                                  d_tau, kappa, d_kappa, alpha0)
+                # [4] Equation 8.9
+                x, y, z, tau, kappa = _do_step(
+                    x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha)
+        except (LinAlgError, FloatingPointError,
+                ValueError, ZeroDivisionError):
+            # this can happen when sparse solver is used and presolve
+            # is turned off. Also observed ValueError in AppVeyor Python 3.6
+            # Win32 build (PR #8676). I've never seen it otherwise.
+            status = 4
+            message = _get_message(status)
+            break
+        # [4] 4.5
+        rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
+            A, b, c, c0, x, y, z, tau, kappa)
+        go = rho_p > tol or rho_d > tol or rho_A > tol
+        if disp:
+            _display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj)
+        if callback is not None:
+            x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
+            res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
+                                  'con': con, 'nit': iteration, 'phase': 1,
+                                  'complete': False, 'status': 0,
+                                  'message': "", 'success': False})
+            callback(res)
+        # [4] 4.5
+        inf1 = (rho_p < tol and rho_d < tol and rho_g < tol and tau < tol *
+                max(1, kappa))
+        inf2 = rho_mu < tol and tau < tol * min(1, kappa)
+        if inf1 or inf2:
+            # [4] Lemma 8.4 / Theorem 8.3
+            if b.transpose().dot(y) > tol:
+                status = 2
+            else:  # elif c.T.dot(x) < tol: ? Probably not necessary.
+                status = 3
+            message = _get_message(status)
+            break
+        elif iteration >= maxiter:
+            status = 1
+            message = _get_message(status)
+            break
+    x_hat = x / tau
+    # [4] Statement after Theorem 8.2
+    return x_hat, status, message, iteration
+def _linprog_ip(c, c0, A, b, callback, postsolve_args, maxiter=1000, tol=1e-8,
+                disp=False, alpha0=.99995, beta=0.1, sparse=False, lstsq=False,
+                sym_pos=True, cholesky=None, pc=True, ip=False,
+                permc_spec='MMD_AT_PLUS_A', **unknown_options):
+    r"""
+    Minimize a linear objective function subject to linear
+    equality and non-negativity constraints using the interior point method
+    of [4]_. Linear programming is intended to solve problems
+    of the following form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    User-facing documentation is in _linprog_doc.py.
+    Parameters
+    ----------
+    c : 1-D array
+        Coefficients of the linear objective function to be minimized.
+    c0 : float
+        Constant term in objective function due to fixed (and eliminated)
+        variables. (Purely for display.)
+    A : 2-D array
+        2-D array such that ``A @ x``, gives the values of the equality
+        constraints at ``x``.
+    b : 1-D array
+        1-D array of values representing the right hand side of each equality
+        constraint (row) in ``A``.
+    callback : callable, optional
+        Callback function to be executed once per iteration.
+    postsolve_args : tuple
+        Data needed by _postsolve to convert the solution to the standard-form
+        problem into the solution to the original problem.
+    Options
+    -------
+    maxiter : int (default = 1000)
+        The maximum number of iterations of the algorithm.
+    tol : float (default = 1e-8)
+        Termination tolerance to be used for all termination criteria;
+        see [4]_ Section 4.5.
+    disp : bool (default = False)
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    alpha0 : float (default = 0.99995)
+        The maximal step size for Mehrota's predictor-corrector search
+        direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
+    beta : float (default = 0.1)
+        The desired reduction of the path parameter :math:`\mu` (see [6]_)
+        when Mehrota's predictor-corrector is not in use (uncommon).
+    sparse : bool (default = False)
+        Set to ``True`` if the problem is to be treated as sparse after
+        presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
+        this option will automatically be set ``True``, and the problem
+        will be treated as sparse even during presolve. If your constraint
+        matrices contain mostly zeros and the problem is not very small (less
+        than about 100 constraints or variables), consider setting ``True``
+        or providing ``A_eq`` and ``A_ub`` as sparse matrices.
+    lstsq : bool (default = False)
+        Set to ``True`` if the problem is expected to be very poorly
+        conditioned. This should always be left ``False`` unless severe
+        numerical difficulties are encountered. Leave this at the default
+        unless you receive a warning message suggesting otherwise.
+    sym_pos : bool (default = True)
+        Leave ``True`` if the problem is expected to yield a well conditioned
+        symmetric positive definite normal equation matrix
+        (almost always). Leave this at the default unless you receive
+        a warning message suggesting otherwise.
+    cholesky : bool (default = True)
+        Set to ``True`` if the normal equations are to be solved by explicit
+        Cholesky decomposition followed by explicit forward/backward
+        substitution. This is typically faster for problems
+        that are numerically well-behaved.
+    pc : bool (default = True)
+        Leave ``True`` if the predictor-corrector method of Mehrota is to be
+        used. This is almost always (if not always) beneficial.
+    ip : bool (default = False)
+        Set to ``True`` if the improved initial point suggestion due to [4]_
+        Section 4.3 is desired. Whether this is beneficial or not
+        depends on the problem.
+    permc_spec : str (default = 'MMD_AT_PLUS_A')
+        (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
+        True``, and no SuiteSparse.)
+        A matrix is factorized in each iteration of the algorithm.
+        This option specifies how to permute the columns of the matrix for
+        sparsity preservation. Acceptable values are:
+        - ``NATURAL``: natural ordering.
+        - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
+        - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
+        - ``COLAMD``: approximate minimum degree column ordering.
+        This option can impact the convergence of the
+        interior point algorithm; test different values to determine which
+        performs best for your problem. For more information, refer to
+        ``scipy.sparse.linalg.splu``.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    x : 1-D array
+        Solution vector.
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Serious numerical difficulties encountered
+    message : str
+        A string descriptor of the exit status of the optimization.
+    iteration : int
+        The number of iterations taken to solve the problem.
+    Notes
+    -----
+    This method implements the algorithm outlined in [4]_ with ideas from [8]_
+    and a structure inspired by the simpler methods of [6]_.
+    The primal-dual path following method begins with initial 'guesses' of
+    the primal and dual variables of the standard form problem and iteratively
+    attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
+    problem with a gradually reduced logarithmic barrier term added to the
+    objective. This particular implementation uses a homogeneous self-dual
+    formulation, which provides certificates of infeasibility or unboundedness
+    where applicable.
+    The default initial point for the primal and dual variables is that
+    defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
+    point option ``ip=True``), an alternate (potentially improved) starting
+    point can be calculated according to the additional recommendations of
+    [4]_ Section 4.4.
+    A search direction is calculated using the predictor-corrector method
+    (single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
+    (A potential improvement would be to implement the method of multiple
+    corrections described in [4]_ Section 4.2.) In practice, this is
+    accomplished by solving the normal equations, [4]_ Section 5.1 Equations
+    8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
+    8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
+    solving the normal equations rather than 8.25 directly is that the
+    matrices involved are symmetric positive definite, so Cholesky
+    decomposition can be used rather than the more expensive LU factorization.
+    With default options, the solver used to perform the factorization depends
+    on third-party software availability and the conditioning of the problem.
+    For dense problems, solvers are tried in the following order:
+    1. ``scipy.linalg.cho_factor``
+    2. ``scipy.linalg.solve`` with option ``sym_pos=True``
+    3. ``scipy.linalg.solve`` with option ``sym_pos=False``
+    4. ``scipy.linalg.lstsq``
+    For sparse problems:
+    1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are installed)
+    2. ``scipy.sparse.linalg.factorized``
+        (if scikit-umfpack and SuiteSparse are installed)
+    3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
+    4. ``scipy.sparse.linalg.lsqr``
+    If the solver fails for any reason, successively more robust (but slower)
+    solvers are attempted in the order indicated. Attempting, failing, and
+    re-starting factorization can be time consuming, so if the problem is
+    numerically challenging, options can be set to  bypass solvers that are
+    failing. Setting ``cholesky=False`` skips to solver 2,
+    ``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
+    to solver 4 for both sparse and dense problems.
+    Potential improvements for combatting issues associated with dense
+    columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
+    [10]_ Section 4.1-4.2; the latter also discusses the alleviation of
+    accuracy issues associated with the substitution approach to free
+    variables.
+    After calculating the search direction, the maximum possible step size
+    that does not activate the non-negativity constraints is calculated, and
+    the smaller of this step size and unity is applied (as in [4]_ Section
+    4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
+    The new point is tested according to the termination conditions of [4]_
+    Section 4.5. The same tolerance, which can be set using the ``tol`` option,
+    is used for all checks. (A potential improvement would be to expose
+    the different tolerances to be set independently.) If optimality,
+    unboundedness, or infeasibility is detected, the solve procedure
+    terminates; otherwise it repeats.
+    The expected problem formulation differs between the top level ``linprog``
+    module and the method specific solvers. The method specific solvers expect a
+    problem in standard form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    Whereas the top level ``linprog`` module expects a problem of form:
+    Minimize::
+        c @ x
+    Subject to::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+         lb <= x <= ub
+    where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
+    The original problem contains equality, upper-bound and variable constraints
+    whereas the method specific solver requires equality constraints and
+    variable non-negativity.
+    ``linprog`` module converts the original problem to standard form by
+    converting the simple bounds to upper bound constraints, introducing
+    non-negative slack variables for inequality constraints, and expressing
+    unbounded variables as the difference between two non-negative variables.
+    References
+    ----------
+    .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
+           optimizer for linear programming: an implementation of the
+           homogeneous algorithm." High performance optimization. Springer US,
+           2000. 197-232.
+    .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
+           Programming based on Newton's Method." Unpublished Course Notes,
+           March 2004. Available 2/25/2017 at
+           https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
+    .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
+           programming." Mathematical Programming 71.2 (1995): 221-245.
+    .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
+           programming." Athena Scientific 1 (1997): 997.
+    .. [10] Andersen, Erling D., et al. Implementation of interior point methods
+            for large scale linear programming. HEC/Universite de Geneve, 1996.
+    """
+    _check_unknown_options(unknown_options)
+    # These should be warnings, not errors
+    if (cholesky or cholesky is None) and sparse and not has_cholmod:
+        if cholesky:
+            warn("Sparse cholesky is only available with scikit-sparse. "
+                 "Setting `cholesky = False`",
+                 OptimizeWarning, stacklevel=3)
+        cholesky = False
+    if sparse and lstsq:
+        warn("Option combination 'sparse':True and 'lstsq':True "
+             "is not recommended.",
+             OptimizeWarning, stacklevel=3)
+    if lstsq and cholesky:
+        warn("Invalid option combination 'lstsq':True "
+             "and 'cholesky':True; option 'cholesky' has no effect when "
+             "'lstsq' is set True.",
+             OptimizeWarning, stacklevel=3)
+    valid_permc_spec = ('NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', 'COLAMD')
+    if permc_spec.upper() not in valid_permc_spec:
+        warn("Invalid permc_spec option: '" + str(permc_spec) + "'. "
+             "Acceptable values are 'NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', "
+             "and 'COLAMD'. Reverting to default.",
+             OptimizeWarning, stacklevel=3)
+        permc_spec = 'MMD_AT_PLUS_A'
+    # This can be an error
+    if not sym_pos and cholesky:
+        raise ValueError(
+            "Invalid option combination 'sym_pos':False "
+            "and 'cholesky':True: Cholesky decomposition is only possible "
+            "for symmetric positive definite matrices.")
+    cholesky = cholesky or (cholesky is None and sym_pos and not lstsq)
+    x, status, message, iteration = _ip_hsd(A, b, c, c0, alpha0, beta,
+                                            maxiter, disp, tol, sparse,
+                                            lstsq, sym_pos, cholesky,
+                                            pc, ip, permc_spec, callback,
+                                            postsolve_args)
+    return x, status, message, iteration

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py ADDED Viewed

	@@ -0,0 +1,572 @@

+"""Revised simplex method for linear programming
+The *revised simplex* method uses the method described in [1]_, except
+that a factorization [2]_ of the basis matrix, rather than its inverse,
+is efficiently maintained and used to solve the linear systems at each
+iteration of the algorithm.
+.. versionadded:: 1.3.0
+References
+----------
+.. [1] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
+           programming." Athena Scientific 1 (1997): 997.
+.. [2] Bartels, Richard H. "A stabilization of the simplex method."
+            Journal in  Numerische Mathematik 16.5 (1971): 414-434.
+"""
+# Author: Matt Haberland
+import numpy as np
+from numpy.linalg import LinAlgError
+from scipy.linalg import solve
+from ._optimize import _check_unknown_options
+from ._bglu_dense import LU
+from ._bglu_dense import BGLU as BGLU
+from ._linprog_util import _postsolve
+from ._optimize import OptimizeResult
+def _phase_one(A, b, x0, callback, postsolve_args, maxiter, tol, disp,
+               maxupdate, mast, pivot):
+    """
+    The purpose of phase one is to find an initial basic feasible solution
+    (BFS) to the original problem.
+    Generates an auxiliary problem with a trivial BFS and an objective that
+    minimizes infeasibility of the original problem. Solves the auxiliary
+    problem using the main simplex routine (phase two). This either yields
+    a BFS to the original problem or determines that the original problem is
+    infeasible. If feasible, phase one detects redundant rows in the original
+    constraint matrix and removes them, then chooses additional indices as
+    necessary to complete a basis/BFS for the original problem.
+    """
+    m, n = A.shape
+    status = 0
+    # generate auxiliary problem to get initial BFS
+    A, b, c, basis, x, status = _generate_auxiliary_problem(A, b, x0, tol)
+    if status == 6:
+        residual = c.dot(x)
+        iter_k = 0
+        return x, basis, A, b, residual, status, iter_k
+    # solve auxiliary problem
+    phase_one_n = n
+    iter_k = 0
+    x, basis, status, iter_k = _phase_two(c, A, x, basis, callback,
+                                          postsolve_args,
+                                          maxiter, tol, disp,
+                                          maxupdate, mast, pivot,
+                                          iter_k, phase_one_n)
+    # check for infeasibility
+    residual = c.dot(x)
+    if status == 0 and residual > tol:
+        status = 2
+    # drive artificial variables out of basis
+    # TODO: test redundant row removal better
+    # TODO: make solve more efficient with BGLU? This could take a while.
+    keep_rows = np.ones(m, dtype=bool)
+    for basis_column in basis[basis >= n]:
+        B = A[:, basis]
+        try:
+            basis_finder = np.abs(solve(B, A))  # inefficient
+            pertinent_row = np.argmax(basis_finder[:, basis_column])
+            eligible_columns = np.ones(n, dtype=bool)
+            eligible_columns[basis[basis < n]] = 0
+            eligible_column_indices = np.where(eligible_columns)[0]
+            index = np.argmax(basis_finder[:, :n]
+                              [pertinent_row, eligible_columns])
+            new_basis_column = eligible_column_indices[index]
+            if basis_finder[pertinent_row, new_basis_column] < tol:
+                keep_rows[pertinent_row] = False
+            else:
+                basis[basis == basis_column] = new_basis_column
+        except LinAlgError:
+            status = 4
+    # form solution to original problem
+    A = A[keep_rows, :n]
+    basis = basis[keep_rows]
+    x = x[:n]
+    m = A.shape[0]
+    return x, basis, A, b, residual, status, iter_k
+def _get_more_basis_columns(A, basis):
+    """
+    Called when the auxiliary problem terminates with artificial columns in
+    the basis, which must be removed and replaced with non-artificial
+    columns. Finds additional columns that do not make the matrix singular.
+    """
+    m, n = A.shape
+    # options for inclusion are those that aren't already in the basis
+    a = np.arange(m+n)
+    bl = np.zeros(len(a), dtype=bool)
+    bl[basis] = 1
+    options = a[~bl]
+    options = options[options < n]  # and they have to be non-artificial
+    # form basis matrix
+    B = np.zeros((m, m))
+    B[:, 0:len(basis)] = A[:, basis]
+    if (basis.size > 0 and
+            np.linalg.matrix_rank(B[:, :len(basis)]) < len(basis)):
+        raise Exception("Basis has dependent columns")
+    rank = 0  # just enter the loop
+    for i in range(n):  # somewhat arbitrary, but we need another way out
+        # permute the options, and take as many as needed
+        new_basis = np.random.permutation(options)[:m-len(basis)]
+        B[:, len(basis):] = A[:, new_basis]  # update the basis matrix
+        rank = np.linalg.matrix_rank(B)      # check the rank
+        if rank == m:
+            break
+    return np.concatenate((basis, new_basis))
+def _generate_auxiliary_problem(A, b, x0, tol):
+    """
+    Modifies original problem to create an auxiliary problem with a trivial
+    initial basic feasible solution and an objective that minimizes
+    infeasibility in the original problem.
+    Conceptually, this is done by stacking an identity matrix on the right of
+    the original constraint matrix, adding artificial variables to correspond
+    with each of these new columns, and generating a cost vector that is all
+    zeros except for ones corresponding with each of the new variables.
+    A initial basic feasible solution is trivial: all variables are zero
+    except for the artificial variables, which are set equal to the
+    corresponding element of the right hand side `b`.
+    Running the simplex method on this auxiliary problem drives all of the
+    artificial variables - and thus the cost - to zero if the original problem
+    is feasible. The original problem is declared infeasible otherwise.
+    Much of the complexity below is to improve efficiency by using singleton
+    columns in the original problem where possible, thus generating artificial
+    variables only as necessary, and using an initial 'guess' basic feasible
+    solution.
+    """
+    status = 0
+    m, n = A.shape
+    if x0 is not None:
+        x = x0
+    else:
+        x = np.zeros(n)
+    r = b - A@x  # residual; this must be all zeros for feasibility
+    A[r < 0] = -A[r < 0]  # express problem with RHS positive for trivial BFS
+    b[r < 0] = -b[r < 0]  # to the auxiliary problem
+    r[r < 0] *= -1
+    # Rows which we will need to find a trivial way to zero.
+    # This should just be the rows where there is a nonzero residual.
+    # But then we would not necessarily have a column singleton in every row.
+    # This makes it difficult to find an initial basis.
+    if x0 is None:
+        nonzero_constraints = np.arange(m)
+    else:
+        nonzero_constraints = np.where(r > tol)[0]
+    # these are (at least some of) the initial basis columns
+    basis = np.where(np.abs(x) > tol)[0]
+    if len(nonzero_constraints) == 0 and len(basis) <= m:  # already a BFS
+        c = np.zeros(n)
+        basis = _get_more_basis_columns(A, basis)
+        return A, b, c, basis, x, status
+    elif (len(nonzero_constraints) > m - len(basis) or
+          np.any(x < 0)):  # can't get trivial BFS
+        c = np.zeros(n)
+        status = 6
+        return A, b, c, basis, x, status
+    # chooses existing columns appropriate for inclusion in initial basis
+    cols, rows = _select_singleton_columns(A, r)
+    # find the rows we need to zero that we _can_ zero with column singletons
+    i_tofix = np.isin(rows, nonzero_constraints)
+    # these columns can't already be in the basis, though
+    # we are going to add them to the basis and change the corresponding x val
+    i_notinbasis = np.logical_not(np.isin(cols, basis))
+    i_fix_without_aux = np.logical_and(i_tofix, i_notinbasis)
+    rows = rows[i_fix_without_aux]
+    cols = cols[i_fix_without_aux]
+    # indices of the rows we can only zero with auxiliary variable
+    # these rows will get a one in each auxiliary column
+    arows = nonzero_constraints[np.logical_not(
+                                np.isin(nonzero_constraints, rows))]
+    n_aux = len(arows)
+    acols = n + np.arange(n_aux)          # indices of auxiliary columns
+    basis_ng = np.concatenate((cols, acols))   # basis columns not from guess
+    basis_ng_rows = np.concatenate((rows, arows))  # rows we need to zero
+    # add auxiliary singleton columns
+    A = np.hstack((A, np.zeros((m, n_aux))))
+    A[arows, acols] = 1
+    # generate initial BFS
+    x = np.concatenate((x, np.zeros(n_aux)))
+    x[basis_ng] = r[basis_ng_rows]/A[basis_ng_rows, basis_ng]
+    # generate costs to minimize infeasibility
+    c = np.zeros(n_aux + n)
+    c[acols] = 1
+    # basis columns correspond with nonzeros in guess, those with column
+    # singletons we used to zero remaining constraints, and any additional
+    # columns to get a full set (m columns)
+    basis = np.concatenate((basis, basis_ng))
+    basis = _get_more_basis_columns(A, basis)  # add columns as needed
+    return A, b, c, basis, x, status
+def _select_singleton_columns(A, b):
+    """
+    Finds singleton columns for which the singleton entry is of the same sign
+    as the right-hand side; these columns are eligible for inclusion in an
+    initial basis. Determines the rows in which the singleton entries are
+    located. For each of these rows, returns the indices of the one singleton
+    column and its corresponding row.
+    """
+    # find indices of all singleton columns and corresponding row indices
+    column_indices = np.nonzero(np.sum(np.abs(A) != 0, axis=0) == 1)[0]
+    columns = A[:, column_indices]          # array of singleton columns
+    row_indices = np.zeros(len(column_indices), dtype=int)
+    nonzero_rows, nonzero_columns = np.nonzero(columns)
+    row_indices[nonzero_columns] = nonzero_rows   # corresponding row indices
+    # keep only singletons with entries that have same sign as RHS
+    # this is necessary because all elements of BFS must be non-negative
+    same_sign = A[row_indices, column_indices]*b[row_indices] >= 0
+    column_indices = column_indices[same_sign][::-1]
+    row_indices = row_indices[same_sign][::-1]
+    # Reversing the order so that steps below select rightmost columns
+    # for initial basis, which will tend to be slack variables. (If the
+    # guess corresponds with a basic feasible solution but a constraint
+    # is not satisfied with the corresponding slack variable zero, the slack
+    # variable must be basic.)
+    # for each row, keep rightmost singleton column with an entry in that row
+    unique_row_indices, first_columns = np.unique(row_indices,
+                                                  return_index=True)
+    return column_indices[first_columns], unique_row_indices
+def _find_nonzero_rows(A, tol):
+    """
+    Returns logical array indicating the locations of rows with at least
+    one nonzero element.
+    """
+    return np.any(np.abs(A) > tol, axis=1)
+def _select_enter_pivot(c_hat, bl, a, rule="bland", tol=1e-12):
+    """
+    Selects a pivot to enter the basis. Currently Bland's rule - the smallest
+    index that has a negative reduced cost - is the default.
+    """
+    if rule.lower() == "mrc":  # index with minimum reduced cost
+        return a[~bl][np.argmin(c_hat)]
+    else:  # smallest index w/ negative reduced cost
+        return a[~bl][c_hat < -tol][0]
+def _display_iter(phase, iteration, slack, con, fun):
+    """
+    Print indicators of optimization status to the console.
+    """
+    header = True if not iteration % 20 else False
+    if header:
+        print("Phase",
+              "Iteration",
+              "Minimum Slack      ",
+              "Constraint Residual",
+              "Objective          ")
+    # :<X.Y left aligns Y digits in X digit spaces
+    fmt = '{0:<6}{1:<10}{2:<20.13}{3:<20.13}{4:<20.13}'
+    try:
+        slack = np.min(slack)
+    except ValueError:
+        slack = "NA"
+    print(fmt.format(phase, iteration, slack, np.linalg.norm(con), fun))
+def _display_and_callback(phase_one_n, x, postsolve_args, status,
+                          iteration, disp, callback):
+    if phase_one_n is not None:
+        phase = 1
+        x_postsolve = x[:phase_one_n]
+    else:
+        phase = 2
+        x_postsolve = x
+    x_o, fun, slack, con = _postsolve(x_postsolve,
+                                      postsolve_args)
+    if callback is not None:
+        res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
+                              'con': con, 'nit': iteration,
+                              'phase': phase, 'complete': False,
+                              'status': status, 'message': "",
+                              'success': False})
+        callback(res)
+    if disp:
+        _display_iter(phase, iteration, slack, con, fun)
+def _phase_two(c, A, x, b, callback, postsolve_args, maxiter, tol, disp,
+               maxupdate, mast, pivot, iteration=0, phase_one_n=None):
+    """
+    The heart of the simplex method. Beginning with a basic feasible solution,
+    moves to adjacent basic feasible solutions successively lower reduced cost.
+    Terminates when there are no basic feasible solutions with lower reduced
+    cost or if the problem is determined to be unbounded.
+    This implementation follows the revised simplex method based on LU
+    decomposition. Rather than maintaining a tableau or an inverse of the
+    basis matrix, we keep a factorization of the basis matrix that allows
+    efficient solution of linear systems while avoiding stability issues
+    associated with inverted matrices.
+    """
+    m, n = A.shape
+    status = 0
+    a = np.arange(n)                    # indices of columns of A
+    ab = np.arange(m)                   # indices of columns of B
+    if maxupdate:
+        # basis matrix factorization object; similar to B = A[:, b]
+        B = BGLU(A, b, maxupdate, mast)
+    else:
+        B = LU(A, b)
+    for iteration in range(iteration, maxiter):
+        if disp or callback is not None:
+            _display_and_callback(phase_one_n, x, postsolve_args, status,
+                                  iteration, disp, callback)
+        bl = np.zeros(len(a), dtype=bool)
+        bl[b] = 1
+        xb = x[b]       # basic variables
+        cb = c[b]       # basic costs
+        try:
+            v = B.solve(cb, transposed=True)    # similar to v = solve(B.T, cb)
+        except LinAlgError:
+            status = 4
+            break
+        # TODO: cythonize?
+        c_hat = c - v.dot(A)    # reduced cost
+        c_hat = c_hat[~bl]
+        # Above is much faster than:
+        # N = A[:, ~bl]                 # slow!
+        # c_hat = c[~bl] - v.T.dot(N)
+        # Can we perform the multiplication only on the nonbasic columns?
+        if np.all(c_hat >= -tol):  # all reduced costs positive -> terminate
+            break
+        j = _select_enter_pivot(c_hat, bl, a, rule=pivot, tol=tol)
+        u = B.solve(A[:, j])        # similar to u = solve(B, A[:, j])
+        i = u > tol                 # if none of the u are positive, unbounded
+        if not np.any(i):
+            status = 3
+            break
+        th = xb[i]/u[i]
+        l = np.argmin(th)           # implicitly selects smallest subscript
+        th_star = th[l]             # step size
+        x[b] = x[b] - th_star*u     # take step
+        x[j] = th_star
+        B.update(ab[i][l], j)       # modify basis
+        b = B.b                     # similar to b[ab[i][l]] =
+    else:
+        # If the end of the for loop is reached (without a break statement),
+        # then another step has been taken, so the iteration counter should
+        # increment, info should be displayed, and callback should be called.
+        iteration += 1
+        status = 1
+        if disp or callback is not None:
+            _display_and_callback(phase_one_n, x, postsolve_args, status,
+                                  iteration, disp, callback)
+    return x, b, status, iteration
+def _linprog_rs(c, c0, A, b, x0, callback, postsolve_args,
+                maxiter=5000, tol=1e-12, disp=False,
+                maxupdate=10, mast=False, pivot="mrc",
+                **unknown_options):
+    """
+    Solve the following linear programming problem via a two-phase
+    revised simplex algorithm.::
+        minimize:     c @ x
+        subject to:  A @ x == b
+                     0 <= x < oo
+    User-facing documentation is in _linprog_doc.py.
+    Parameters
+    ----------
+    c : 1-D array
+        Coefficients of the linear objective function to be minimized.
+    c0 : float
+        Constant term in objective function due to fixed (and eliminated)
+        variables. (Currently unused.)
+    A : 2-D array
+        2-D array which, when matrix-multiplied by ``x``, gives the values of
+        the equality constraints at ``x``.
+    b : 1-D array
+        1-D array of values representing the RHS of each equality constraint
+        (row) in ``A_eq``.
+    x0 : 1-D array, optional
+        Starting values of the independent variables, which will be refined by
+        the optimization algorithm. For the revised simplex method, these must
+        correspond with a basic feasible solution.
+    callback : callable, optional
+        If a callback function is provided, it will be called within each
+        iteration of the algorithm. The callback function must accept a single
+        `scipy.optimize.OptimizeResult` consisting of the following fields:
+            x : 1-D array
+                Current solution vector.
+            fun : float
+                Current value of the objective function ``c @ x``.
+            success : bool
+                True only when an algorithm has completed successfully,
+                so this is always False as the callback function is called
+                only while the algorithm is still iterating.
+            slack : 1-D array
+                The values of the slack variables. Each slack variable
+                corresponds to an inequality constraint. If the slack is zero,
+                the corresponding constraint is active.
+            con : 1-D array
+                The (nominally zero) residuals of the equality constraints,
+                that is, ``b - A_eq @ x``.
+            phase : int
+                The phase of the algorithm being executed.
+            status : int
+                For revised simplex, this is always 0 because if a different
+                status is detected, the algorithm terminates.
+            nit : int
+                The number of iterations performed.
+            message : str
+                A string descriptor of the exit status of the optimization.
+    postsolve_args : tuple
+        Data needed by _postsolve to convert the solution to the standard-form
+        problem into the solution to the original problem.
+    Options
+    -------
+    maxiter : int
+       The maximum number of iterations to perform in either phase.
+    tol : float
+        The tolerance which determines when a solution is "close enough" to
+        zero in Phase 1 to be considered a basic feasible solution or close
+        enough to positive to serve as an optimal solution.
+    disp : bool
+        Set to ``True`` if indicators of optimization status are to be printed
+        to the console each iteration.
+    maxupdate : int
+        The maximum number of updates performed on the LU factorization.
+        After this many updates is reached, the basis matrix is factorized
+        from scratch.
+    mast : bool
+        Minimize Amortized Solve Time. If enabled, the average time to solve
+        a linear system using the basis factorization is measured. Typically,
+        the average solve time will decrease with each successive solve after
+        initial factorization, as factorization takes much more time than the
+        solve operation (and updates). Eventually, however, the updated
+        factorization becomes sufficiently complex that the average solve time
+        begins to increase. When this is detected, the basis is refactorized
+        from scratch. Enable this option to maximize speed at the risk of
+        nondeterministic behavior. Ignored if ``maxupdate`` is 0.
+    pivot : "mrc" or "bland"
+        Pivot rule: Minimum Reduced Cost (default) or Bland's rule. Choose
+        Bland's rule if iteration limit is reached and cycling is suspected.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    x : 1-D array
+        Solution vector.
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Numerical difficulties encountered
+         5 : No constraints; turn presolve on
+         6 : Guess x0 cannot be converted to a basic feasible solution
+    message : str
+        A string descriptor of the exit status of the optimization.
+    iteration : int
+        The number of iterations taken to solve the problem.
+    """
+    _check_unknown_options(unknown_options)
+    messages = ["Optimization terminated successfully.",
+                "Iteration limit reached.",
+                "The problem appears infeasible, as the phase one auxiliary "
+                "problem terminated successfully with a residual of {0:.1e}, "
+                "greater than the tolerance {1} required for the solution to "
+                "be considered feasible. Consider increasing the tolerance to "
+                "be greater than {0:.1e}. If this tolerance is unnaceptably "
+                "large, the problem is likely infeasible.",
+                "The problem is unbounded, as the simplex algorithm found "
+                "a basic feasible solution from which there is a direction "
+                "with negative reduced cost in which all decision variables "
+                "increase.",
+                "Numerical difficulties encountered; consider trying "
+                "method='interior-point'.",
+                "Problems with no constraints are trivially solved; please "
+                "turn presolve on.",
+                "The guess x0 cannot be converted to a basic feasible "
+                "solution. "
+                ]
+    if A.size == 0:  # address test_unbounded_below_no_presolve_corrected
+        return np.zeros(c.shape), 5, messages[5], 0
+    x, basis, A, b, residual, status, iteration = (
+        _phase_one(A, b, x0, callback, postsolve_args,
+                   maxiter, tol, disp, maxupdate, mast, pivot))
+    if status == 0:
+        x, basis, status, iteration = _phase_two(c, A, x, basis, callback,
+                                                 postsolve_args,
+                                                 maxiter, tol, disp,
+                                                 maxupdate, mast, pivot,
+                                                 iteration)
+    return x, status, messages[status].format(residual, tol), iteration

emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py ADDED Viewed

	@@ -0,0 +1,661 @@

+"""Simplex method for  linear programming
+The *simplex* method uses a traditional, full-tableau implementation of
+Dantzig's simplex algorithm [1]_, [2]_ (*not* the Nelder-Mead simplex).
+This algorithm is included for backwards compatibility and educational
+purposes.
+    .. versionadded:: 0.15.0
+Warnings
+--------
+The simplex method may encounter numerical difficulties when pivot
+values are close to the specified tolerance. If encountered try
+remove any redundant constraints, change the pivot strategy to Bland's
+rule or increase the tolerance value.
+Alternatively, more robust methods maybe be used. See
+:ref:`'interior-point' <optimize.linprog-interior-point>` and
+:ref:`'revised simplex' <optimize.linprog-revised_simplex>`.
+References
+----------
+.. [1] Dantzig, George B., Linear programming and extensions. Rand
+       Corporation Research Study Princeton Univ. Press, Princeton, NJ,
+       1963
+.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
+       Mathematical Programming", McGraw-Hill, Chapter 4.
+"""
+import numpy as np
+from warnings import warn
+from ._optimize import OptimizeResult, OptimizeWarning, _check_unknown_options
+from ._linprog_util import _postsolve
+def _pivot_col(T, tol=1e-9, bland=False):
+    """
+    Given a linear programming simplex tableau, determine the column
+    of the variable to enter the basis.
+    Parameters
+    ----------
+    T : 2-D array
+        A 2-D array representing the simplex tableau, T, corresponding to the
+        linear programming problem. It should have the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],    0]]
+        for a Phase 2 problem, or the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],   0],
+         [c'[0],  c'[1], ...,  c'[n_total],  0]]
+         for a Phase 1 problem (a problem in which a basic feasible solution is
+         sought prior to maximizing the actual objective. ``T`` is modified in
+         place by ``_solve_simplex``.
+    tol : float
+        Elements in the objective row larger than -tol will not be considered
+        for pivoting. Nominally this value is zero, but numerical issues
+        cause a tolerance about zero to be necessary.
+    bland : bool
+        If True, use Bland's rule for selection of the column (select the
+        first column with a negative coefficient in the objective row,
+        regardless of magnitude).
+    Returns
+    -------
+    status: bool
+        True if a suitable pivot column was found, otherwise False.
+        A return of False indicates that the linear programming simplex
+        algorithm is complete.
+    col: int
+        The index of the column of the pivot element.
+        If status is False, col will be returned as nan.
+    """
+    ma = np.ma.masked_where(T[-1, :-1] >= -tol, T[-1, :-1], copy=False)
+    if ma.count() == 0:
+        return False, np.nan
+    if bland:
+        # ma.mask is sometimes 0d
+        return True, np.nonzero(np.logical_not(np.atleast_1d(ma.mask)))[0][0]
+    return True, np.ma.nonzero(ma == ma.min())[0][0]
+def _pivot_row(T, basis, pivcol, phase, tol=1e-9, bland=False):
+    """
+    Given a linear programming simplex tableau, determine the row for the
+    pivot operation.
+    Parameters
+    ----------
+    T : 2-D array
+        A 2-D array representing the simplex tableau, T, corresponding to the
+        linear programming problem. It should have the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],    0]]
+        for a Phase 2 problem, or the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],   0],
+         [c'[0],  c'[1], ...,  c'[n_total],  0]]
+         for a Phase 1 problem (a Problem in which a basic feasible solution is
+         sought prior to maximizing the actual objective. ``T`` is modified in
+         place by ``_solve_simplex``.
+    basis : array
+        A list of the current basic variables.
+    pivcol : int
+        The index of the pivot column.
+    phase : int
+        The phase of the simplex algorithm (1 or 2).
+    tol : float
+        Elements in the pivot column smaller than tol will not be considered
+        for pivoting. Nominally this value is zero, but numerical issues
+        cause a tolerance about zero to be necessary.
+    bland : bool
+        If True, use Bland's rule for selection of the row (if more than one
+        row can be used, choose the one with the lowest variable index).
+    Returns
+    -------
+    status: bool
+        True if a suitable pivot row was found, otherwise False. A return
+        of False indicates that the linear programming problem is unbounded.
+    row: int
+        The index of the row of the pivot element. If status is False, row
+        will be returned as nan.
+    """
+    if phase == 1:
+        k = 2
+    else:
+        k = 1
+    ma = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, pivcol], copy=False)
+    if ma.count() == 0:
+        return False, np.nan
+    mb = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, -1], copy=False)
+    q = mb / ma
+    min_rows = np.ma.nonzero(q == q.min())[0]
+    if bland:
+        return True, min_rows[np.argmin(np.take(basis, min_rows))]
+    return True, min_rows[0]
+def _apply_pivot(T, basis, pivrow, pivcol, tol=1e-9):
+    """
+    Pivot the simplex tableau inplace on the element given by (pivrow, pivol).
+    The entering variable corresponds to the column given by pivcol forcing
+    the variable basis[pivrow] to leave the basis.
+    Parameters
+    ----------
+    T : 2-D array
+        A 2-D array representing the simplex tableau, T, corresponding to the
+        linear programming problem. It should have the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],    0]]
+        for a Phase 2 problem, or the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],   0],
+         [c'[0],  c'[1], ...,  c'[n_total],  0]]
+         for a Phase 1 problem (a problem in which a basic feasible solution is
+         sought prior to maximizing the actual objective. ``T`` is modified in
+         place by ``_solve_simplex``.
+    basis : 1-D array
+        An array of the indices of the basic variables, such that basis[i]
+        contains the column corresponding to the basic variable for row i.
+        Basis is modified in place by _apply_pivot.
+    pivrow : int
+        Row index of the pivot.
+    pivcol : int
+        Column index of the pivot.
+    """
+    basis[pivrow] = pivcol
+    pivval = T[pivrow, pivcol]
+    T[pivrow] = T[pivrow] / pivval
+    for irow in range(T.shape[0]):
+        if irow != pivrow:
+            T[irow] = T[irow] - T[pivrow] * T[irow, pivcol]
+    # The selected pivot should never lead to a pivot value less than the tol.
+    if np.isclose(pivval, tol, atol=0, rtol=1e4):
+        message = (
+            f"The pivot operation produces a pivot value of:{pivval: .1e}, "
+            "which is only slightly greater than the specified "
+            f"tolerance{tol: .1e}. This may lead to issues regarding the "
+            "numerical stability of the simplex method. "
+            "Removing redundant constraints, changing the pivot strategy "
+            "via Bland's rule or increasing the tolerance may "
+            "help reduce the issue.")
+        warn(message, OptimizeWarning, stacklevel=5)
+def _solve_simplex(T, n, basis, callback, postsolve_args,
+                   maxiter=1000, tol=1e-9, phase=2, bland=False, nit0=0,
+                   ):
+    """
+    Solve a linear programming problem in "standard form" using the Simplex
+    Method. Linear Programming is intended to solve the following problem form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    Parameters
+    ----------
+    T : 2-D array
+        A 2-D array representing the simplex tableau, T, corresponding to the
+        linear programming problem. It should have the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],    0]]
+        for a Phase 2 problem, or the form:
+        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
+         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
+         .
+         .
+         .
+         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
+         [c[0],   c[1], ...,   c[n_total],   0],
+         [c'[0],  c'[1], ...,  c'[n_total],  0]]
+         for a Phase 1 problem (a problem in which a basic feasible solution is
+         sought prior to maximizing the actual objective. ``T`` is modified in
+         place by ``_solve_simplex``.
+    n : int
+        The number of true variables in the problem.
+    basis : 1-D array
+        An array of the indices of the basic variables, such that basis[i]
+        contains the column corresponding to the basic variable for row i.
+        Basis is modified in place by _solve_simplex
+    callback : callable, optional
+        If a callback function is provided, it will be called within each
+        iteration of the algorithm. The callback must accept a
+        `scipy.optimize.OptimizeResult` consisting of the following fields:
+            x : 1-D array
+                Current solution vector
+            fun : float
+                Current value of the objective function
+            success : bool
+                True only when a phase has completed successfully. This
+                will be False for most iterations.
+            slack : 1-D array
+                The values of the slack variables. Each slack variable
+                corresponds to an inequality constraint. If the slack is zero,
+                the corresponding constraint is active.
+            con : 1-D array
+                The (nominally zero) residuals of the equality constraints,
+                that is, ``b - A_eq @ x``
+            phase : int
+                The phase of the optimization being executed. In phase 1 a basic
+                feasible solution is sought and the T has an additional row
+                representing an alternate objective function.
+            status : int
+                An integer representing the exit status of the optimization::
+                     0 : Optimization terminated successfully
+                     1 : Iteration limit reached
+                     2 : Problem appears to be infeasible
+                     3 : Problem appears to be unbounded
+                     4 : Serious numerical difficulties encountered
+            nit : int
+                The number of iterations performed.
+            message : str
+                A string descriptor of the exit status of the optimization.
+    postsolve_args : tuple
+        Data needed by _postsolve to convert the solution to the standard-form
+        problem into the solution to the original problem.
+    maxiter : int
+        The maximum number of iterations to perform before aborting the
+        optimization.
+    tol : float
+        The tolerance which determines when a solution is "close enough" to
+        zero in Phase 1 to be considered a basic feasible solution or close
+        enough to positive to serve as an optimal solution.
+    phase : int
+        The phase of the optimization being executed. In phase 1 a basic
+        feasible solution is sought and the T has an additional row
+        representing an alternate objective function.
+    bland : bool
+        If True, choose pivots using Bland's rule [3]_. In problems which
+        fail to converge due to cycling, using Bland's rule can provide
+        convergence at the expense of a less optimal path about the simplex.
+    nit0 : int
+        The initial iteration number used to keep an accurate iteration total
+        in a two-phase problem.
+    Returns
+    -------
+    nit : int
+        The number of iterations. Used to keep an accurate iteration total
+        in the two-phase problem.
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Serious numerical difficulties encountered
+    """
+    nit = nit0
+    status = 0
+    message = ''
+    complete = False
+    if phase == 1:
+        m = T.shape[1]-2
+    elif phase == 2:
+        m = T.shape[1]-1
+    else:
+        raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
+    if phase == 2:
+        # Check if any artificial variables are still in the basis.
+        # If yes, check if any coefficients from this row and a column
+        # corresponding to one of the non-artificial variable is non-zero.
+        # If found, pivot at this term. If not, start phase 2.
+        # Do this for all artificial variables in the basis.
+        # Ref: "An Introduction to Linear Programming and Game Theory"
+        # by Paul R. Thie, Gerard E. Keough, 3rd Ed,
+        # Chapter 3.7 Redundant Systems (pag 102)
+        for pivrow in [row for row in range(basis.size)
+                       if basis[row] > T.shape[1] - 2]:
+            non_zero_row = [col for col in range(T.shape[1] - 1)
+                            if abs(T[pivrow, col]) > tol]
+            if len(non_zero_row) > 0:
+                pivcol = non_zero_row[0]
+                _apply_pivot(T, basis, pivrow, pivcol, tol)
+                nit += 1
+    if len(basis[:m]) == 0:
+        solution = np.empty(T.shape[1] - 1, dtype=np.float64)
+    else:
+        solution = np.empty(max(T.shape[1] - 1, max(basis[:m]) + 1),
+                            dtype=np.float64)
+    while not complete:
+        # Find the pivot column
+        pivcol_found, pivcol = _pivot_col(T, tol, bland)
+        if not pivcol_found:
+            pivcol = np.nan
+            pivrow = np.nan
+            status = 0
+            complete = True
+        else:
+            # Find the pivot row
+            pivrow_found, pivrow = _pivot_row(T, basis, pivcol, phase, tol, bland)
+            if not pivrow_found:
+                status = 3
+                complete = True
+        if callback is not None:
+            solution[:] = 0
+            solution[basis[:n]] = T[:n, -1]
+            x = solution[:m]
+            x, fun, slack, con = _postsolve(
+                x, postsolve_args
+            )
+            res = OptimizeResult({
+                'x': x,
+                'fun': fun,
+                'slack': slack,
+                'con': con,
+                'status': status,
+                'message': message,
+                'nit': nit,
+                'success': status == 0 and complete,
+                'phase': phase,
+                'complete': complete,
+                })
+            callback(res)
+        if not complete:
+            if nit >= maxiter:
+                # Iteration limit exceeded
+                status = 1
+                complete = True
+            else:
+                _apply_pivot(T, basis, pivrow, pivcol, tol)
+                nit += 1
+    return nit, status
+def _linprog_simplex(c, c0, A, b, callback, postsolve_args,
+                     maxiter=1000, tol=1e-9, disp=False, bland=False,
+                     **unknown_options):
+    """
+    Minimize a linear objective function subject to linear equality and
+    non-negativity constraints using the two phase simplex method.
+    Linear programming is intended to solve problems of the following form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    User-facing documentation is in _linprog_doc.py.
+    Parameters
+    ----------
+    c : 1-D array
+        Coefficients of the linear objective function to be minimized.
+    c0 : float
+        Constant term in objective function due to fixed (and eliminated)
+        variables. (Purely for display.)
+    A : 2-D array
+        2-D array such that ``A @ x``, gives the values of the equality
+        constraints at ``x``.
+    b : 1-D array
+        1-D array of values representing the right hand side of each equality
+        constraint (row) in ``A``.
+    callback : callable, optional
+        If a callback function is provided, it will be called within each
+        iteration of the algorithm. The callback function must accept a single
+        `scipy.optimize.OptimizeResult` consisting of the following fields:
+            x : 1-D array
+                Current solution vector
+            fun : float
+                Current value of the objective function
+            success : bool
+                True when an algorithm has completed successfully.
+            slack : 1-D array
+                The values of the slack variables. Each slack variable
+                corresponds to an inequality constraint. If the slack is zero,
+                the corresponding constraint is active.
+            con : 1-D array
+                The (nominally zero) residuals of the equality constraints,
+                that is, ``b - A_eq @ x``
+            phase : int
+                The phase of the algorithm being executed.
+            status : int
+                An integer representing the status of the optimization::
+                     0 : Algorithm proceeding nominally
+                     1 : Iteration limit reached
+                     2 : Problem appears to be infeasible
+                     3 : Problem appears to be unbounded
+                     4 : Serious numerical difficulties encountered
+            nit : int
+                The number of iterations performed.
+            message : str
+                A string descriptor of the exit status of the optimization.
+    postsolve_args : tuple
+        Data needed by _postsolve to convert the solution to the standard-form
+        problem into the solution to the original problem.
+    Options
+    -------
+    maxiter : int
+       The maximum number of iterations to perform.
+    disp : bool
+        If True, print exit status message to sys.stdout
+    tol : float
+        The tolerance which determines when a solution is "close enough" to
+        zero in Phase 1 to be considered a basic feasible solution or close
+        enough to positive to serve as an optimal solution.
+    bland : bool
+        If True, use Bland's anti-cycling rule [3]_ to choose pivots to
+        prevent cycling. If False, choose pivots which should lead to a
+        converged solution more quickly. The latter method is subject to
+        cycling (non-convergence) in rare instances.
+    unknown_options : dict
+        Optional arguments not used by this particular solver. If
+        `unknown_options` is non-empty a warning is issued listing all
+        unused options.
+    Returns
+    -------
+    x : 1-D array
+        Solution vector.
+    status : int
+        An integer representing the exit status of the optimization::
+         0 : Optimization terminated successfully
+         1 : Iteration limit reached
+         2 : Problem appears to be infeasible
+         3 : Problem appears to be unbounded
+         4 : Serious numerical difficulties encountered
+    message : str
+        A string descriptor of the exit status of the optimization.
+    iteration : int
+        The number of iterations taken to solve the problem.
+    References
+    ----------
+    .. [1] Dantzig, George B., Linear programming and extensions. Rand
+           Corporation Research Study Princeton Univ. Press, Princeton, NJ,
+           1963
+    .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
+           Mathematical Programming", McGraw-Hill, Chapter 4.
+    .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
+           Mathematics of Operations Research (2), 1977: pp. 103-107.
+    Notes
+    -----
+    The expected problem formulation differs between the top level ``linprog``
+    module and the method specific solvers. The method specific solvers expect a
+    problem in standard form:
+    Minimize::
+        c @ x
+    Subject to::
+        A @ x == b
+            x >= 0
+    Whereas the top level ``linprog`` module expects a problem of form:
+    Minimize::
+        c @ x
+    Subject to::
+        A_ub @ x <= b_ub
+        A_eq @ x == b_eq
+         lb <= x <= ub
+    where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
+    The original problem contains equality, upper-bound and variable constraints
+    whereas the method specific solver requires equality constraints and
+    variable non-negativity.
+    ``linprog`` module converts the original problem to standard form by
+    converting the simple bounds to upper bound constraints, introducing
+    non-negative slack variables for inequality constraints, and expressing
+    unbounded variables as the difference between two non-negative variables.
+    """
+    _check_unknown_options(unknown_options)
+    status = 0
+    messages = {0: "Optimization terminated successfully.",
+                1: "Iteration limit reached.",
+                2: "Optimization failed. Unable to find a feasible"
+                   " starting point.",
+                3: "Optimization failed. The problem appears to be unbounded.",
+                4: "Optimization failed. Singular matrix encountered."}
+    n, m = A.shape
+    # All constraints must have b >= 0.
+    is_negative_constraint = np.less(b, 0)
+    A[is_negative_constraint] *= -1
+    b[is_negative_constraint] *= -1
+    # As all constraints are equality constraints the artificial variables
+    # will also be basic variables.
+    av = np.arange(n) + m
+    basis = av.copy()
+    # Format the phase one tableau by adding artificial variables and stacking
+    # the constraints, the objective row and pseudo-objective row.
+    row_constraints = np.hstack((A, np.eye(n), b[:, np.newaxis]))
+    row_objective = np.hstack((c, np.zeros(n), c0))
+    row_pseudo_objective = -row_constraints.sum(axis=0)
+    row_pseudo_objective[av] = 0
+    T = np.vstack((row_constraints, row_objective, row_pseudo_objective))
+    nit1, status = _solve_simplex(T, n, basis, callback=callback,
+                                  postsolve_args=postsolve_args,
+                                  maxiter=maxiter, tol=tol, phase=1,
+                                  bland=bland
+                                  )
+    # if pseudo objective is zero, remove the last row from the tableau and
+    # proceed to phase 2
+    nit2 = nit1
+    if abs(T[-1, -1]) < tol:
+        # Remove the pseudo-objective row from the tableau
+        T = T[:-1, :]
+        # Remove the artificial variable columns from the tableau
+        T = np.delete(T, av, 1)
+    else:
+        # Failure to find a feasible starting point
+        status = 2
+        messages[status] = (
+            "Phase 1 of the simplex method failed to find a feasible "
+            "solution. The pseudo-objective function evaluates to {0:.1e} "
+            "which exceeds the required tolerance of {1} for a solution to be "
+            "considered 'close enough' to zero to be a basic solution. "
+            "Consider increasing the tolerance to be greater than {0:.1e}. "
+            "If this tolerance is unacceptably  large the problem may be "
+            "infeasible.".format(abs(T[-1, -1]), tol)
+        )
+    if status == 0:
+        # Phase 2
+        nit2, status = _solve_simplex(T, n, basis, callback=callback,
+                                      postsolve_args=postsolve_args,
+                                      maxiter=maxiter, tol=tol, phase=2,
+                                      bland=bland, nit0=nit1
+                                      )
+    solution = np.zeros(n + m)
+    solution[basis[:n]] = T[:n, -1]
+    x = solution[:m]
+    return x, status, messages[status], int(nit2)