krish-emissary commited on
Commit
0d098bc
·
verified ·
1 Parent(s): ad58bcd

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc +0 -0
  2. emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc +0 -0
  3. emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc +0 -0
  4. emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json +0 -0
  5. emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json +30 -0
  6. emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json +0 -0
  7. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py +130 -0
  8. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py +608 -0
  9. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py +459 -0
  10. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth +1 -0
  11. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py +18 -0
  12. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py +342 -0
  13. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py +348 -0
  14. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py +7 -0
  15. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py +219 -0
  16. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py +304 -0
  17. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv +0 -0
  18. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth +1 -0
  19. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py +161 -0
  20. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py +141 -0
  21. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py +18 -0
  22. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py +413 -0
  23. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd +1 -0
  24. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd +1 -0
  25. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README +76 -0
  26. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py +452 -0
  27. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py +753 -0
  28. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py +666 -0
  29. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py +549 -0
  30. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py +316 -0
  31. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py +62 -0
  32. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py +590 -0
  33. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py +728 -0
  34. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py +693 -0
  35. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py +1951 -0
  36. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py +856 -0
  37. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so +0 -0
  38. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py +278 -0
  39. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py +732 -0
  40. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so +0 -0
  41. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py +475 -0
  42. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py +158 -0
  43. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py +543 -0
  44. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py +896 -0
  45. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py +716 -0
  46. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py +1434 -0
  47. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py +440 -0
  48. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py +1126 -0
  49. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py +572 -0
  50. emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py +661 -0
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs.cpython-310.pyc ADDED
Binary file (9.62 kB). View file
 
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/classification_graphs_binary.cpython-310.pyc ADDED
Binary file (5.6 kB). View file
 
emissary-ml/llm-scripts/fine-tuning/llama3/__pycache__/get_max_tokens.cpython-310.pyc ADDED
Binary file (2.48 kB). View file
 
emissary-ml/llm-scripts/fine-tuning/llama3/checkpoints/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
emissary-ml/llm-scripts/fine-tuning/llama3/outputs/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/_virtualenv.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Patches that are applied at runtime to the virtual environment"""
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ import sys
6
+
7
+ VIRTUALENV_PATCH_FILE = os.path.join(__file__)
8
+
9
+
10
+ def patch_dist(dist):
11
+ """
12
+ Distutils allows user to configure some arguments via a configuration file:
13
+ https://docs.python.org/3/install/index.html#distutils-configuration-files
14
+
15
+ Some of this arguments though don't make sense in context of the virtual environment files, let's fix them up.
16
+ """
17
+ # we cannot allow some install config as that would get packages installed outside of the virtual environment
18
+ old_parse_config_files = dist.Distribution.parse_config_files
19
+
20
+ def parse_config_files(self, *args, **kwargs):
21
+ result = old_parse_config_files(self, *args, **kwargs)
22
+ install = self.get_option_dict("install")
23
+
24
+ if "prefix" in install: # the prefix governs where to install the libraries
25
+ install["prefix"] = VIRTUALENV_PATCH_FILE, os.path.abspath(sys.prefix)
26
+ for base in ("purelib", "platlib", "headers", "scripts", "data"):
27
+ key = "install_{}".format(base)
28
+ if key in install: # do not allow global configs to hijack venv paths
29
+ install.pop(key, None)
30
+ return result
31
+
32
+ dist.Distribution.parse_config_files = parse_config_files
33
+
34
+
35
+ # Import hook that patches some modules to ignore configuration values that break package installation in case
36
+ # of virtual environments.
37
+ _DISTUTILS_PATCH = "distutils.dist", "setuptools.dist"
38
+ if sys.version_info > (3, 4):
39
+ # https://docs.python.org/3/library/importlib.html#setting-up-an-importer
40
+ from functools import partial
41
+ from importlib.abc import MetaPathFinder
42
+ from importlib.util import find_spec
43
+
44
+ class _Finder(MetaPathFinder):
45
+ """A meta path finder that allows patching the imported distutils modules"""
46
+
47
+ fullname = None
48
+
49
+ # lock[0] is threading.Lock(), but initialized lazily to avoid importing threading very early at startup,
50
+ # because there are gevent-based applications that need to be first to import threading by themselves.
51
+ # See https://github.com/pypa/virtualenv/issues/1895 for details.
52
+ lock = []
53
+
54
+ def find_spec(self, fullname, path, target=None):
55
+ if fullname in _DISTUTILS_PATCH and self.fullname is None:
56
+ # initialize lock[0] lazily
57
+ if len(self.lock) == 0:
58
+ import threading
59
+
60
+ lock = threading.Lock()
61
+ # there is possibility that two threads T1 and T2 are simultaneously running into find_spec,
62
+ # observing .lock as empty, and further going into hereby initialization. However due to the GIL,
63
+ # list.append() operation is atomic and this way only one of the threads will "win" to put the lock
64
+ # - that every thread will use - into .lock[0].
65
+ # https://docs.python.org/3/faq/library.html#what-kinds-of-global-value-mutation-are-thread-safe
66
+ self.lock.append(lock)
67
+
68
+ with self.lock[0]:
69
+ self.fullname = fullname
70
+ try:
71
+ spec = find_spec(fullname, path)
72
+ if spec is not None:
73
+ # https://www.python.org/dev/peps/pep-0451/#how-loading-will-work
74
+ is_new_api = hasattr(spec.loader, "exec_module")
75
+ func_name = "exec_module" if is_new_api else "load_module"
76
+ old = getattr(spec.loader, func_name)
77
+ func = self.exec_module if is_new_api else self.load_module
78
+ if old is not func:
79
+ try:
80
+ setattr(spec.loader, func_name, partial(func, old))
81
+ except AttributeError:
82
+ pass # C-Extension loaders are r/o such as zipimporter with <python 3.7
83
+ return spec
84
+ finally:
85
+ self.fullname = None
86
+
87
+ @staticmethod
88
+ def exec_module(old, module):
89
+ old(module)
90
+ if module.__name__ in _DISTUTILS_PATCH:
91
+ patch_dist(module)
92
+
93
+ @staticmethod
94
+ def load_module(old, name):
95
+ module = old(name)
96
+ if module.__name__ in _DISTUTILS_PATCH:
97
+ patch_dist(module)
98
+ return module
99
+
100
+ sys.meta_path.insert(0, _Finder())
101
+ else:
102
+ # https://www.python.org/dev/peps/pep-0302/
103
+ from imp import find_module
104
+ from pkgutil import ImpImporter, ImpLoader
105
+
106
+ class _VirtualenvImporter(object, ImpImporter):
107
+ def __init__(self, path=None):
108
+ object.__init__(self)
109
+ ImpImporter.__init__(self, path)
110
+
111
+ def find_module(self, fullname, path=None):
112
+ if fullname in _DISTUTILS_PATCH:
113
+ try:
114
+ return _VirtualenvLoader(fullname, *find_module(fullname.split(".")[-1], path))
115
+ except ImportError:
116
+ pass
117
+ return None
118
+
119
+ class _VirtualenvLoader(object, ImpLoader):
120
+ def __init__(self, fullname, file, filename, etc):
121
+ object.__init__(self)
122
+ ImpLoader.__init__(self, fullname, file, filename, etc)
123
+
124
+ def load_module(self, fullname):
125
+ module = super(_VirtualenvLoader, self).load_module(fullname)
126
+ patch_dist(module)
127
+ module.__loader__ = None # distlib fallback
128
+ return module
129
+
130
+ sys.meta_path.append(_VirtualenvImporter())
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/appdirs.py ADDED
@@ -0,0 +1,608 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2005-2010 ActiveState Software Inc.
4
+ # Copyright (c) 2013 Eddy Petrișor
5
+
6
+ """Utilities for determining application-specific dirs.
7
+
8
+ See <http://github.com/ActiveState/appdirs> for details and usage.
9
+ """
10
+ # Dev Notes:
11
+ # - MSDN on where to store app data files:
12
+ # http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
13
+ # - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
14
+ # - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
15
+
16
+ __version__ = "1.4.4"
17
+ __version_info__ = tuple(int(segment) for segment in __version__.split("."))
18
+
19
+
20
+ import sys
21
+ import os
22
+
23
+ PY3 = sys.version_info[0] == 3
24
+
25
+ if PY3:
26
+ unicode = str
27
+
28
+ if sys.platform.startswith('java'):
29
+ import platform
30
+ os_name = platform.java_ver()[3][0]
31
+ if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
32
+ system = 'win32'
33
+ elif os_name.startswith('Mac'): # "Mac OS X", etc.
34
+ system = 'darwin'
35
+ else: # "Linux", "SunOS", "FreeBSD", etc.
36
+ # Setting this to "linux2" is not ideal, but only Windows or Mac
37
+ # are actually checked for and the rest of the module expects
38
+ # *sys.platform* style strings.
39
+ system = 'linux2'
40
+ else:
41
+ system = sys.platform
42
+
43
+
44
+
45
+ def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
46
+ r"""Return full path to the user-specific data dir for this application.
47
+
48
+ "appname" is the name of application.
49
+ If None, just the system directory is returned.
50
+ "appauthor" (only used on Windows) is the name of the
51
+ appauthor or distributing body for this application. Typically
52
+ it is the owning company name. This falls back to appname. You may
53
+ pass False to disable it.
54
+ "version" is an optional version path element to append to the
55
+ path. You might want to use this if you want multiple versions
56
+ of your app to be able to run independently. If used, this
57
+ would typically be "<major>.<minor>".
58
+ Only applied when appname is present.
59
+ "roaming" (boolean, default False) can be set True to use the Windows
60
+ roaming appdata directory. That means that for users on a Windows
61
+ network setup for roaming profiles, this user data will be
62
+ sync'd on login. See
63
+ <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
64
+ for a discussion of issues.
65
+
66
+ Typical user data directories are:
67
+ Mac OS X: ~/Library/Application Support/<AppName>
68
+ Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
69
+ Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
70
+ Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
71
+ Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
72
+ Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
73
+
74
+ For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
75
+ That means, by default "~/.local/share/<AppName>".
76
+ """
77
+ if system == "win32":
78
+ if appauthor is None:
79
+ appauthor = appname
80
+ const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
81
+ path = os.path.normpath(_get_win_folder(const))
82
+ if appname:
83
+ if appauthor is not False:
84
+ path = os.path.join(path, appauthor, appname)
85
+ else:
86
+ path = os.path.join(path, appname)
87
+ elif system == 'darwin':
88
+ path = os.path.expanduser('~/Library/Application Support/')
89
+ if appname:
90
+ path = os.path.join(path, appname)
91
+ else:
92
+ path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
93
+ if appname:
94
+ path = os.path.join(path, appname)
95
+ if appname and version:
96
+ path = os.path.join(path, version)
97
+ return path
98
+
99
+
100
+ def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
101
+ r"""Return full path to the user-shared data dir for this application.
102
+
103
+ "appname" is the name of application.
104
+ If None, just the system directory is returned.
105
+ "appauthor" (only used on Windows) is the name of the
106
+ appauthor or distributing body for this application. Typically
107
+ it is the owning company name. This falls back to appname. You may
108
+ pass False to disable it.
109
+ "version" is an optional version path element to append to the
110
+ path. You might want to use this if you want multiple versions
111
+ of your app to be able to run independently. If used, this
112
+ would typically be "<major>.<minor>".
113
+ Only applied when appname is present.
114
+ "multipath" is an optional parameter only applicable to *nix
115
+ which indicates that the entire list of data dirs should be
116
+ returned. By default, the first item from XDG_DATA_DIRS is
117
+ returned, or '/usr/local/share/<AppName>',
118
+ if XDG_DATA_DIRS is not set
119
+
120
+ Typical site data directories are:
121
+ Mac OS X: /Library/Application Support/<AppName>
122
+ Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
123
+ Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
124
+ Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
125
+ Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
126
+
127
+ For Unix, this is using the $XDG_DATA_DIRS[0] default.
128
+
129
+ WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
130
+ """
131
+ if system == "win32":
132
+ if appauthor is None:
133
+ appauthor = appname
134
+ path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
135
+ if appname:
136
+ if appauthor is not False:
137
+ path = os.path.join(path, appauthor, appname)
138
+ else:
139
+ path = os.path.join(path, appname)
140
+ elif system == 'darwin':
141
+ path = os.path.expanduser('/Library/Application Support')
142
+ if appname:
143
+ path = os.path.join(path, appname)
144
+ else:
145
+ # XDG default for $XDG_DATA_DIRS
146
+ # only first, if multipath is False
147
+ path = os.getenv('XDG_DATA_DIRS',
148
+ os.pathsep.join(['/usr/local/share', '/usr/share']))
149
+ pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
150
+ if appname:
151
+ if version:
152
+ appname = os.path.join(appname, version)
153
+ pathlist = [os.sep.join([x, appname]) for x in pathlist]
154
+
155
+ if multipath:
156
+ path = os.pathsep.join(pathlist)
157
+ else:
158
+ path = pathlist[0]
159
+ return path
160
+
161
+ if appname and version:
162
+ path = os.path.join(path, version)
163
+ return path
164
+
165
+
166
+ def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
167
+ r"""Return full path to the user-specific config dir for this application.
168
+
169
+ "appname" is the name of application.
170
+ If None, just the system directory is returned.
171
+ "appauthor" (only used on Windows) is the name of the
172
+ appauthor or distributing body for this application. Typically
173
+ it is the owning company name. This falls back to appname. You may
174
+ pass False to disable it.
175
+ "version" is an optional version path element to append to the
176
+ path. You might want to use this if you want multiple versions
177
+ of your app to be able to run independently. If used, this
178
+ would typically be "<major>.<minor>".
179
+ Only applied when appname is present.
180
+ "roaming" (boolean, default False) can be set True to use the Windows
181
+ roaming appdata directory. That means that for users on a Windows
182
+ network setup for roaming profiles, this user data will be
183
+ sync'd on login. See
184
+ <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
185
+ for a discussion of issues.
186
+
187
+ Typical user config directories are:
188
+ Mac OS X: same as user_data_dir
189
+ Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
190
+ Win *: same as user_data_dir
191
+
192
+ For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
193
+ That means, by default "~/.config/<AppName>".
194
+ """
195
+ if system in ["win32", "darwin"]:
196
+ path = user_data_dir(appname, appauthor, None, roaming)
197
+ else:
198
+ path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
199
+ if appname:
200
+ path = os.path.join(path, appname)
201
+ if appname and version:
202
+ path = os.path.join(path, version)
203
+ return path
204
+
205
+
206
+ def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
207
+ r"""Return full path to the user-shared data dir for this application.
208
+
209
+ "appname" is the name of application.
210
+ If None, just the system directory is returned.
211
+ "appauthor" (only used on Windows) is the name of the
212
+ appauthor or distributing body for this application. Typically
213
+ it is the owning company name. This falls back to appname. You may
214
+ pass False to disable it.
215
+ "version" is an optional version path element to append to the
216
+ path. You might want to use this if you want multiple versions
217
+ of your app to be able to run independently. If used, this
218
+ would typically be "<major>.<minor>".
219
+ Only applied when appname is present.
220
+ "multipath" is an optional parameter only applicable to *nix
221
+ which indicates that the entire list of config dirs should be
222
+ returned. By default, the first item from XDG_CONFIG_DIRS is
223
+ returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
224
+
225
+ Typical site config directories are:
226
+ Mac OS X: same as site_data_dir
227
+ Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
228
+ $XDG_CONFIG_DIRS
229
+ Win *: same as site_data_dir
230
+ Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
231
+
232
+ For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
233
+
234
+ WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
235
+ """
236
+ if system in ["win32", "darwin"]:
237
+ path = site_data_dir(appname, appauthor)
238
+ if appname and version:
239
+ path = os.path.join(path, version)
240
+ else:
241
+ # XDG default for $XDG_CONFIG_DIRS
242
+ # only first, if multipath is False
243
+ path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
244
+ pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
245
+ if appname:
246
+ if version:
247
+ appname = os.path.join(appname, version)
248
+ pathlist = [os.sep.join([x, appname]) for x in pathlist]
249
+
250
+ if multipath:
251
+ path = os.pathsep.join(pathlist)
252
+ else:
253
+ path = pathlist[0]
254
+ return path
255
+
256
+
257
+ def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
258
+ r"""Return full path to the user-specific cache dir for this application.
259
+
260
+ "appname" is the name of application.
261
+ If None, just the system directory is returned.
262
+ "appauthor" (only used on Windows) is the name of the
263
+ appauthor or distributing body for this application. Typically
264
+ it is the owning company name. This falls back to appname. You may
265
+ pass False to disable it.
266
+ "version" is an optional version path element to append to the
267
+ path. You might want to use this if you want multiple versions
268
+ of your app to be able to run independently. If used, this
269
+ would typically be "<major>.<minor>".
270
+ Only applied when appname is present.
271
+ "opinion" (boolean) can be False to disable the appending of
272
+ "Cache" to the base app data dir for Windows. See
273
+ discussion below.
274
+
275
+ Typical user cache directories are:
276
+ Mac OS X: ~/Library/Caches/<AppName>
277
+ Unix: ~/.cache/<AppName> (XDG default)
278
+ Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
279
+ Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
280
+
281
+ On Windows the only suggestion in the MSDN docs is that local settings go in
282
+ the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
283
+ app data dir (the default returned by `user_data_dir` above). Apps typically
284
+ put cache data somewhere *under* the given dir here. Some examples:
285
+ ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
286
+ ...\Acme\SuperApp\Cache\1.0
287
+ OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
288
+ This can be disabled with the `opinion=False` option.
289
+ """
290
+ if system == "win32":
291
+ if appauthor is None:
292
+ appauthor = appname
293
+ path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
294
+ if appname:
295
+ if appauthor is not False:
296
+ path = os.path.join(path, appauthor, appname)
297
+ else:
298
+ path = os.path.join(path, appname)
299
+ if opinion:
300
+ path = os.path.join(path, "Cache")
301
+ elif system == 'darwin':
302
+ path = os.path.expanduser('~/Library/Caches')
303
+ if appname:
304
+ path = os.path.join(path, appname)
305
+ else:
306
+ path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
307
+ if appname:
308
+ path = os.path.join(path, appname)
309
+ if appname and version:
310
+ path = os.path.join(path, version)
311
+ return path
312
+
313
+
314
+ def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
315
+ r"""Return full path to the user-specific state dir for this application.
316
+
317
+ "appname" is the name of application.
318
+ If None, just the system directory is returned.
319
+ "appauthor" (only used on Windows) is the name of the
320
+ appauthor or distributing body for this application. Typically
321
+ it is the owning company name. This falls back to appname. You may
322
+ pass False to disable it.
323
+ "version" is an optional version path element to append to the
324
+ path. You might want to use this if you want multiple versions
325
+ of your app to be able to run independently. If used, this
326
+ would typically be "<major>.<minor>".
327
+ Only applied when appname is present.
328
+ "roaming" (boolean, default False) can be set True to use the Windows
329
+ roaming appdata directory. That means that for users on a Windows
330
+ network setup for roaming profiles, this user data will be
331
+ sync'd on login. See
332
+ <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
333
+ for a discussion of issues.
334
+
335
+ Typical user state directories are:
336
+ Mac OS X: same as user_data_dir
337
+ Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
338
+ Win *: same as user_data_dir
339
+
340
+ For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
341
+ to extend the XDG spec and support $XDG_STATE_HOME.
342
+
343
+ That means, by default "~/.local/state/<AppName>".
344
+ """
345
+ if system in ["win32", "darwin"]:
346
+ path = user_data_dir(appname, appauthor, None, roaming)
347
+ else:
348
+ path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
349
+ if appname:
350
+ path = os.path.join(path, appname)
351
+ if appname and version:
352
+ path = os.path.join(path, version)
353
+ return path
354
+
355
+
356
+ def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
357
+ r"""Return full path to the user-specific log dir for this application.
358
+
359
+ "appname" is the name of application.
360
+ If None, just the system directory is returned.
361
+ "appauthor" (only used on Windows) is the name of the
362
+ appauthor or distributing body for this application. Typically
363
+ it is the owning company name. This falls back to appname. You may
364
+ pass False to disable it.
365
+ "version" is an optional version path element to append to the
366
+ path. You might want to use this if you want multiple versions
367
+ of your app to be able to run independently. If used, this
368
+ would typically be "<major>.<minor>".
369
+ Only applied when appname is present.
370
+ "opinion" (boolean) can be False to disable the appending of
371
+ "Logs" to the base app data dir for Windows, and "log" to the
372
+ base cache dir for Unix. See discussion below.
373
+
374
+ Typical user log directories are:
375
+ Mac OS X: ~/Library/Logs/<AppName>
376
+ Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
377
+ Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
378
+ Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
379
+
380
+ On Windows the only suggestion in the MSDN docs is that local settings
381
+ go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
382
+ examples of what some windows apps use for a logs dir.)
383
+
384
+ OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
385
+ value for Windows and appends "log" to the user cache dir for Unix.
386
+ This can be disabled with the `opinion=False` option.
387
+ """
388
+ if system == "darwin":
389
+ path = os.path.join(
390
+ os.path.expanduser('~/Library/Logs'),
391
+ appname)
392
+ elif system == "win32":
393
+ path = user_data_dir(appname, appauthor, version)
394
+ version = False
395
+ if opinion:
396
+ path = os.path.join(path, "Logs")
397
+ else:
398
+ path = user_cache_dir(appname, appauthor, version)
399
+ version = False
400
+ if opinion:
401
+ path = os.path.join(path, "log")
402
+ if appname and version:
403
+ path = os.path.join(path, version)
404
+ return path
405
+
406
+
407
+ class AppDirs(object):
408
+ """Convenience wrapper for getting application dirs."""
409
+ def __init__(self, appname=None, appauthor=None, version=None,
410
+ roaming=False, multipath=False):
411
+ self.appname = appname
412
+ self.appauthor = appauthor
413
+ self.version = version
414
+ self.roaming = roaming
415
+ self.multipath = multipath
416
+
417
+ @property
418
+ def user_data_dir(self):
419
+ return user_data_dir(self.appname, self.appauthor,
420
+ version=self.version, roaming=self.roaming)
421
+
422
+ @property
423
+ def site_data_dir(self):
424
+ return site_data_dir(self.appname, self.appauthor,
425
+ version=self.version, multipath=self.multipath)
426
+
427
+ @property
428
+ def user_config_dir(self):
429
+ return user_config_dir(self.appname, self.appauthor,
430
+ version=self.version, roaming=self.roaming)
431
+
432
+ @property
433
+ def site_config_dir(self):
434
+ return site_config_dir(self.appname, self.appauthor,
435
+ version=self.version, multipath=self.multipath)
436
+
437
+ @property
438
+ def user_cache_dir(self):
439
+ return user_cache_dir(self.appname, self.appauthor,
440
+ version=self.version)
441
+
442
+ @property
443
+ def user_state_dir(self):
444
+ return user_state_dir(self.appname, self.appauthor,
445
+ version=self.version)
446
+
447
+ @property
448
+ def user_log_dir(self):
449
+ return user_log_dir(self.appname, self.appauthor,
450
+ version=self.version)
451
+
452
+
453
+ #---- internal support stuff
454
+
455
+ def _get_win_folder_from_registry(csidl_name):
456
+ """This is a fallback technique at best. I'm not sure if using the
457
+ registry for this guarantees us the correct answer for all CSIDL_*
458
+ names.
459
+ """
460
+ if PY3:
461
+ import winreg as _winreg
462
+ else:
463
+ import _winreg
464
+
465
+ shell_folder_name = {
466
+ "CSIDL_APPDATA": "AppData",
467
+ "CSIDL_COMMON_APPDATA": "Common AppData",
468
+ "CSIDL_LOCAL_APPDATA": "Local AppData",
469
+ }[csidl_name]
470
+
471
+ key = _winreg.OpenKey(
472
+ _winreg.HKEY_CURRENT_USER,
473
+ r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
474
+ )
475
+ dir, type = _winreg.QueryValueEx(key, shell_folder_name)
476
+ return dir
477
+
478
+
479
+ def _get_win_folder_with_pywin32(csidl_name):
480
+ from win32com.shell import shellcon, shell
481
+ dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
482
+ # Try to make this a unicode path because SHGetFolderPath does
483
+ # not return unicode strings when there is unicode data in the
484
+ # path.
485
+ try:
486
+ dir = unicode(dir)
487
+
488
+ # Downgrade to short path name if have highbit chars. See
489
+ # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
490
+ has_high_char = False
491
+ for c in dir:
492
+ if ord(c) > 255:
493
+ has_high_char = True
494
+ break
495
+ if has_high_char:
496
+ try:
497
+ import win32api
498
+ dir = win32api.GetShortPathName(dir)
499
+ except ImportError:
500
+ pass
501
+ except UnicodeError:
502
+ pass
503
+ return dir
504
+
505
+
506
+ def _get_win_folder_with_ctypes(csidl_name):
507
+ import ctypes
508
+
509
+ csidl_const = {
510
+ "CSIDL_APPDATA": 26,
511
+ "CSIDL_COMMON_APPDATA": 35,
512
+ "CSIDL_LOCAL_APPDATA": 28,
513
+ }[csidl_name]
514
+
515
+ buf = ctypes.create_unicode_buffer(1024)
516
+ ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
517
+
518
+ # Downgrade to short path name if have highbit chars. See
519
+ # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
520
+ has_high_char = False
521
+ for c in buf:
522
+ if ord(c) > 255:
523
+ has_high_char = True
524
+ break
525
+ if has_high_char:
526
+ buf2 = ctypes.create_unicode_buffer(1024)
527
+ if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
528
+ buf = buf2
529
+
530
+ return buf.value
531
+
532
+ def _get_win_folder_with_jna(csidl_name):
533
+ import array
534
+ from com.sun import jna
535
+ from com.sun.jna.platform import win32
536
+
537
+ buf_size = win32.WinDef.MAX_PATH * 2
538
+ buf = array.zeros('c', buf_size)
539
+ shell = win32.Shell32.INSTANCE
540
+ shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
541
+ dir = jna.Native.toString(buf.tostring()).rstrip("\0")
542
+
543
+ # Downgrade to short path name if have highbit chars. See
544
+ # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
545
+ has_high_char = False
546
+ for c in dir:
547
+ if ord(c) > 255:
548
+ has_high_char = True
549
+ break
550
+ if has_high_char:
551
+ buf = array.zeros('c', buf_size)
552
+ kernel = win32.Kernel32.INSTANCE
553
+ if kernel.GetShortPathName(dir, buf, buf_size):
554
+ dir = jna.Native.toString(buf.tostring()).rstrip("\0")
555
+
556
+ return dir
557
+
558
+ if system == "win32":
559
+ try:
560
+ import win32com.shell
561
+ _get_win_folder = _get_win_folder_with_pywin32
562
+ except ImportError:
563
+ try:
564
+ from ctypes import windll
565
+ _get_win_folder = _get_win_folder_with_ctypes
566
+ except ImportError:
567
+ try:
568
+ import com.sun.jna
569
+ _get_win_folder = _get_win_folder_with_jna
570
+ except ImportError:
571
+ _get_win_folder = _get_win_folder_from_registry
572
+
573
+
574
+ #---- self test code
575
+
576
+ if __name__ == "__main__":
577
+ appname = "MyApp"
578
+ appauthor = "MyCompany"
579
+
580
+ props = ("user_data_dir",
581
+ "user_config_dir",
582
+ "user_cache_dir",
583
+ "user_state_dir",
584
+ "user_log_dir",
585
+ "site_data_dir",
586
+ "site_config_dir")
587
+
588
+ print("-- app dirs %s --" % __version__)
589
+
590
+ print("-- app dirs (with optional 'version')")
591
+ dirs = AppDirs(appname, appauthor, version="1.0")
592
+ for prop in props:
593
+ print("%s: %s" % (prop, getattr(dirs, prop)))
594
+
595
+ print("\n-- app dirs (without optional 'version')")
596
+ dirs = AppDirs(appname, appauthor)
597
+ for prop in props:
598
+ print("%s: %s" % (prop, getattr(dirs, prop)))
599
+
600
+ print("\n-- app dirs (without optional 'appauthor')")
601
+ dirs = AppDirs(appname)
602
+ for prop in props:
603
+ print("%s: %s" % (prop, getattr(dirs, prop)))
604
+
605
+ print("\n-- app dirs (with disabled 'appauthor')")
606
+ dirs = AppDirs(appname, appauthor=False)
607
+ for prop in props:
608
+ print("%s: %s" % (prop, getattr(dirs, prop)))
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/decorator.py ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ######################### LICENSE ############################ #
2
+
3
+ # Copyright (c) 2005-2025, Michele Simionato
4
+ # All rights reserved.
5
+
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are
8
+ # met:
9
+
10
+ # Redistributions of source code must retain the above copyright
11
+ # notice, this list of conditions and the following disclaimer.
12
+ # Redistributions in bytecode form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in
14
+ # the documentation and/or other materials provided with the
15
+ # distribution.
16
+
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ # HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24
+ # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26
+ # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27
+ # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28
+ # DAMAGE.
29
+
30
+ """
31
+ Decorator module, see
32
+ https://github.com/micheles/decorator/blob/master/docs/documentation.md
33
+ for the documentation.
34
+ """
35
+ import re
36
+ import sys
37
+ import inspect
38
+ import operator
39
+ import itertools
40
+ import functools
41
+ from contextlib import _GeneratorContextManager
42
+ from inspect import getfullargspec, iscoroutinefunction, isgeneratorfunction
43
+
44
+ __version__ = '5.2.1'
45
+
46
+ DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(')
47
+ POS = inspect.Parameter.POSITIONAL_OR_KEYWORD
48
+ EMPTY = inspect.Parameter.empty
49
+
50
+
51
+ # this is not used anymore in the core, but kept for backward compatibility
52
+ class FunctionMaker(object):
53
+ """
54
+ An object with the ability to create functions with a given signature.
55
+ It has attributes name, doc, module, signature, defaults, dict and
56
+ methods update and make.
57
+ """
58
+
59
+ # Atomic get-and-increment provided by the GIL
60
+ _compile_count = itertools.count()
61
+
62
+ # make pylint happy
63
+ args = varargs = varkw = defaults = kwonlyargs = kwonlydefaults = ()
64
+
65
+ def __init__(self, func=None, name=None, signature=None,
66
+ defaults=None, doc=None, module=None, funcdict=None):
67
+ self.shortsignature = signature
68
+ if func:
69
+ # func can be a class or a callable, but not an instance method
70
+ self.name = func.__name__
71
+ if self.name == '<lambda>': # small hack for lambda functions
72
+ self.name = '_lambda_'
73
+ self.doc = func.__doc__
74
+ self.module = func.__module__
75
+ if inspect.isroutine(func) or isinstance(func, functools.partial):
76
+ argspec = getfullargspec(func)
77
+ self.annotations = getattr(func, '__annotations__', {})
78
+ for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs',
79
+ 'kwonlydefaults'):
80
+ setattr(self, a, getattr(argspec, a))
81
+ for i, arg in enumerate(self.args):
82
+ setattr(self, 'arg%d' % i, arg)
83
+ allargs = list(self.args)
84
+ allshortargs = list(self.args)
85
+ if self.varargs:
86
+ allargs.append('*' + self.varargs)
87
+ allshortargs.append('*' + self.varargs)
88
+ elif self.kwonlyargs:
89
+ allargs.append('*') # single star syntax
90
+ for a in self.kwonlyargs:
91
+ allargs.append('%s=None' % a)
92
+ allshortargs.append('%s=%s' % (a, a))
93
+ if self.varkw:
94
+ allargs.append('**' + self.varkw)
95
+ allshortargs.append('**' + self.varkw)
96
+ self.signature = ', '.join(allargs)
97
+ self.shortsignature = ', '.join(allshortargs)
98
+ self.dict = func.__dict__.copy()
99
+ # func=None happens when decorating a caller
100
+ if name:
101
+ self.name = name
102
+ if signature is not None:
103
+ self.signature = signature
104
+ if defaults:
105
+ self.defaults = defaults
106
+ if doc:
107
+ self.doc = doc
108
+ if module:
109
+ self.module = module
110
+ if funcdict:
111
+ self.dict = funcdict
112
+ # check existence required attributes
113
+ assert hasattr(self, 'name')
114
+ if not hasattr(self, 'signature'):
115
+ raise TypeError('You are decorating a non function: %s' % func)
116
+
117
+ def update(self, func, **kw):
118
+ """
119
+ Update the signature of func with the data in self
120
+ """
121
+ func.__name__ = self.name
122
+ func.__doc__ = getattr(self, 'doc', None)
123
+ func.__dict__ = getattr(self, 'dict', {})
124
+ func.__defaults__ = self.defaults
125
+ func.__kwdefaults__ = self.kwonlydefaults or None
126
+ func.__annotations__ = getattr(self, 'annotations', None)
127
+ try:
128
+ frame = sys._getframe(3)
129
+ except AttributeError: # for IronPython and similar implementations
130
+ callermodule = '?'
131
+ else:
132
+ callermodule = frame.f_globals.get('__name__', '?')
133
+ func.__module__ = getattr(self, 'module', callermodule)
134
+ func.__dict__.update(kw)
135
+
136
+ def make(self, src_templ, evaldict=None, addsource=False, **attrs):
137
+ """
138
+ Make a new function from a given template and update the signature
139
+ """
140
+ src = src_templ % vars(self) # expand name and signature
141
+ evaldict = evaldict or {}
142
+ mo = DEF.search(src)
143
+ if mo is None:
144
+ raise SyntaxError('not a valid function template\n%s' % src)
145
+ name = mo.group(1) # extract the function name
146
+ names = set([name] + [arg.strip(' *') for arg in
147
+ self.shortsignature.split(',')])
148
+ for n in names:
149
+ if n in ('_func_', '_call_'):
150
+ raise NameError('%s is overridden in\n%s' % (n, src))
151
+
152
+ if not src.endswith('\n'): # add a newline for old Pythons
153
+ src += '\n'
154
+
155
+ # Ensure each generated function has a unique filename for profilers
156
+ # (such as cProfile) that depend on the tuple of (<filename>,
157
+ # <definition line>, <function name>) being unique.
158
+ filename = '<decorator-gen-%d>' % next(self._compile_count)
159
+ try:
160
+ code = compile(src, filename, 'single')
161
+ exec(code, evaldict)
162
+ except Exception:
163
+ print('Error in generated code:', file=sys.stderr)
164
+ print(src, file=sys.stderr)
165
+ raise
166
+ func = evaldict[name]
167
+ if addsource:
168
+ attrs['__source__'] = src
169
+ self.update(func, **attrs)
170
+ return func
171
+
172
+ @classmethod
173
+ def create(cls, obj, body, evaldict, defaults=None,
174
+ doc=None, module=None, addsource=True, **attrs):
175
+ """
176
+ Create a function from the strings name, signature and body.
177
+ evaldict is the evaluation dictionary. If addsource is true an
178
+ attribute __source__ is added to the result. The attributes attrs
179
+ are added, if any.
180
+ """
181
+ if isinstance(obj, str): # "name(signature)"
182
+ name, rest = obj.strip().split('(', 1)
183
+ signature = rest[:-1] # strip a right parens
184
+ func = None
185
+ else: # a function
186
+ name = None
187
+ signature = None
188
+ func = obj
189
+ self = cls(func, name, signature, defaults, doc, module)
190
+ ibody = '\n'.join(' ' + line for line in body.splitlines())
191
+ caller = evaldict.get('_call_') # when called from `decorate`
192
+ if caller and iscoroutinefunction(caller):
193
+ body = ('async def %(name)s(%(signature)s):\n' + ibody).replace(
194
+ 'return', 'return await')
195
+ else:
196
+ body = 'def %(name)s(%(signature)s):\n' + ibody
197
+ return self.make(body, evaldict, addsource, **attrs)
198
+
199
+
200
+ def fix(args, kwargs, sig):
201
+ """
202
+ Fix args and kwargs to be consistent with the signature
203
+ """
204
+ ba = sig.bind(*args, **kwargs)
205
+ ba.apply_defaults() # needed for test_dan_schult
206
+ return ba.args, ba.kwargs
207
+
208
+
209
+ def decorate(func, caller, extras=(), kwsyntax=False):
210
+ """
211
+ Decorates a function/generator/coroutine using a caller.
212
+ If kwsyntax is True calling the decorated functions with keyword
213
+ syntax will pass the named arguments inside the ``kw`` dictionary,
214
+ even if such argument are positional, similarly to what functools.wraps
215
+ does. By default kwsyntax is False and the the arguments are untouched.
216
+ """
217
+ sig = inspect.signature(func)
218
+ if isinstance(func, functools.partial):
219
+ func = functools.update_wrapper(func, func.func)
220
+ if iscoroutinefunction(caller):
221
+ async def fun(*args, **kw):
222
+ if not kwsyntax:
223
+ args, kw = fix(args, kw, sig)
224
+ return await caller(func, *(extras + args), **kw)
225
+ elif isgeneratorfunction(caller):
226
+ def fun(*args, **kw):
227
+ if not kwsyntax:
228
+ args, kw = fix(args, kw, sig)
229
+ for res in caller(func, *(extras + args), **kw):
230
+ yield res
231
+ else:
232
+ def fun(*args, **kw):
233
+ if not kwsyntax:
234
+ args, kw = fix(args, kw, sig)
235
+ return caller(func, *(extras + args), **kw)
236
+
237
+ fun.__name__ = func.__name__
238
+ fun.__doc__ = func.__doc__
239
+ fun.__wrapped__ = func
240
+ fun.__signature__ = sig
241
+ fun.__qualname__ = func.__qualname__
242
+ # builtin functions like defaultdict.__setitem__ lack many attributes
243
+ try:
244
+ fun.__defaults__ = func.__defaults__
245
+ except AttributeError:
246
+ pass
247
+ try:
248
+ fun.__kwdefaults__ = func.__kwdefaults__
249
+ except AttributeError:
250
+ pass
251
+ try:
252
+ fun.__annotations__ = func.__annotations__
253
+ except AttributeError:
254
+ pass
255
+ try:
256
+ fun.__module__ = func.__module__
257
+ except AttributeError:
258
+ pass
259
+ try:
260
+ fun.__name__ = func.__name__
261
+ except AttributeError: # happens with old versions of numpy.vectorize
262
+ func.__name__ == 'noname'
263
+ try:
264
+ fun.__dict__.update(func.__dict__)
265
+ except AttributeError:
266
+ pass
267
+ return fun
268
+
269
+
270
+ def decoratorx(caller):
271
+ """
272
+ A version of "decorator" implemented via "exec" and not via the
273
+ Signature object. Use this if you are want to preserve the `.__code__`
274
+ object properties (https://github.com/micheles/decorator/issues/129).
275
+ """
276
+ def dec(func):
277
+ return FunctionMaker.create(
278
+ func,
279
+ "return _call_(_func_, %(shortsignature)s)",
280
+ dict(_call_=caller, _func_=func),
281
+ __wrapped__=func, __qualname__=func.__qualname__)
282
+ return dec
283
+
284
+
285
+ def decorator(caller, _func=None, kwsyntax=False):
286
+ """
287
+ decorator(caller) converts a caller function into a decorator
288
+ """
289
+ if _func is not None: # return a decorated function
290
+ # this is obsolete behavior; you should use decorate instead
291
+ return decorate(_func, caller, (), kwsyntax)
292
+ # else return a decorator function
293
+ sig = inspect.signature(caller)
294
+ dec_params = [p for p in sig.parameters.values() if p.kind is POS]
295
+
296
+ def dec(func=None, *args, **kw):
297
+ na = len(args) + 1
298
+ extras = args + tuple(kw.get(p.name, p.default)
299
+ for p in dec_params[na:]
300
+ if p.default is not EMPTY)
301
+ if func is None:
302
+ return lambda func: decorate(func, caller, extras, kwsyntax)
303
+ else:
304
+ return decorate(func, caller, extras, kwsyntax)
305
+ dec.__signature__ = sig.replace(parameters=dec_params)
306
+ dec.__name__ = caller.__name__
307
+ dec.__doc__ = caller.__doc__
308
+ dec.__wrapped__ = caller
309
+ dec.__qualname__ = caller.__qualname__
310
+ dec.__kwdefaults__ = getattr(caller, '__kwdefaults__', None)
311
+ dec.__dict__.update(caller.__dict__)
312
+ return dec
313
+
314
+
315
+ # ####################### contextmanager ####################### #
316
+
317
+
318
+ class ContextManager(_GeneratorContextManager):
319
+ def __init__(self, g, *a, **k):
320
+ _GeneratorContextManager.__init__(self, g, a, k)
321
+
322
+ def __call__(self, func):
323
+ def caller(f, *a, **k):
324
+ with self.__class__(self.func, *self.args, **self.kwds):
325
+ return f(*a, **k)
326
+ return decorate(func, caller)
327
+
328
+
329
+ _contextmanager = decorator(ContextManager)
330
+
331
+
332
+ def contextmanager(func):
333
+ # Enable Pylint config: contextmanager-decorators=decorator.contextmanager
334
+ return _contextmanager(func)
335
+
336
+
337
+ # ############################ dispatch_on ############################ #
338
+
339
+ def append(a, vancestors):
340
+ """
341
+ Append ``a`` to the list of the virtual ancestors, unless it is already
342
+ included.
343
+ """
344
+ add = True
345
+ for j, va in enumerate(vancestors):
346
+ if issubclass(va, a):
347
+ add = False
348
+ break
349
+ if issubclass(a, va):
350
+ vancestors[j] = a
351
+ add = False
352
+ if add:
353
+ vancestors.append(a)
354
+
355
+
356
+ # inspired from simplegeneric by P.J. Eby and functools.singledispatch
357
+ def dispatch_on(*dispatch_args):
358
+ """
359
+ Factory of decorators turning a function into a generic function
360
+ dispatching on the given arguments.
361
+ """
362
+ assert dispatch_args, 'No dispatch args passed'
363
+ dispatch_str = '(%s,)' % ', '.join(dispatch_args)
364
+
365
+ def check(arguments, wrong=operator.ne, msg=''):
366
+ """Make sure one passes the expected number of arguments"""
367
+ if wrong(len(arguments), len(dispatch_args)):
368
+ raise TypeError('Expected %d arguments, got %d%s' %
369
+ (len(dispatch_args), len(arguments), msg))
370
+
371
+ def gen_func_dec(func):
372
+ """Decorator turning a function into a generic function"""
373
+
374
+ # first check the dispatch arguments
375
+ argset = set(getfullargspec(func).args)
376
+ if not set(dispatch_args) <= argset:
377
+ raise NameError('Unknown dispatch arguments %s' % dispatch_str)
378
+
379
+ typemap = {}
380
+
381
+ def vancestors(*types):
382
+ """
383
+ Get a list of sets of virtual ancestors for the given types
384
+ """
385
+ check(types)
386
+ ras = [[] for _ in range(len(dispatch_args))]
387
+ for types_ in typemap:
388
+ for t, type_, ra in zip(types, types_, ras):
389
+ if issubclass(t, type_) and type_ not in t.mro():
390
+ append(type_, ra)
391
+ return [set(ra) for ra in ras]
392
+
393
+ def ancestors(*types):
394
+ """
395
+ Get a list of virtual MROs, one for each type
396
+ """
397
+ check(types)
398
+ lists = []
399
+ for t, vas in zip(types, vancestors(*types)):
400
+ n_vas = len(vas)
401
+ if n_vas > 1:
402
+ raise RuntimeError(
403
+ 'Ambiguous dispatch for %s: %s' % (t, vas))
404
+ elif n_vas == 1:
405
+ va, = vas
406
+ mro = type('t', (t, va), {}).mro()[1:]
407
+ else:
408
+ mro = t.mro()
409
+ lists.append(mro[:-1]) # discard t and object
410
+ return lists
411
+
412
+ def register(*types):
413
+ """
414
+ Decorator to register an implementation for the given types
415
+ """
416
+ check(types)
417
+
418
+ def dec(f):
419
+ check(getfullargspec(f).args, operator.lt, ' in ' + f.__name__)
420
+ typemap[types] = f
421
+ return f
422
+ return dec
423
+
424
+ def dispatch_info(*types):
425
+ """
426
+ An utility to introspect the dispatch algorithm
427
+ """
428
+ check(types)
429
+ lst = []
430
+ for ancs in itertools.product(*ancestors(*types)):
431
+ lst.append(tuple(a.__name__ for a in ancs))
432
+ return lst
433
+
434
+ def _dispatch(dispatch_args, *args, **kw):
435
+ types = tuple(type(arg) for arg in dispatch_args)
436
+ try: # fast path
437
+ f = typemap[types]
438
+ except KeyError:
439
+ pass
440
+ else:
441
+ return f(*args, **kw)
442
+ combinations = itertools.product(*ancestors(*types))
443
+ next(combinations) # the first one has been already tried
444
+ for types_ in combinations:
445
+ f = typemap.get(types_)
446
+ if f is not None:
447
+ return f(*args, **kw)
448
+
449
+ # else call the default implementation
450
+ return func(*args, **kw)
451
+
452
+ return FunctionMaker.create(
453
+ func, 'return _f_(%s, %%(shortsignature)s)' % dispatch_str,
454
+ dict(_f_=_dispatch), register=register, default=func,
455
+ typemap=typemap, vancestors=vancestors, ancestors=ancestors,
456
+ dispatch_info=dispatch_info, __wrapped__=func)
457
+
458
+ gen_func_dec.__name__ = 'dispatch_on' + dispatch_str
459
+ return gen_func_dec
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/distutils-precedence.pth ADDED
@@ -0,0 +1 @@
 
 
1
+ import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/ipykernel_launcher.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entry point for launching an IPython kernel.
2
+
3
+ This is separate from the ipykernel package so we can avoid doing imports until
4
+ after removing the cwd from sys.path.
5
+ """
6
+
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ if __name__ == "__main__":
11
+ # Remove the CWD from sys.path while we load stuff.
12
+ # This is added back by InteractiveShellApp.init_path()
13
+ if sys.path[0] == "" or Path(sys.path[0]) == Path.cwd():
14
+ del sys.path[0]
15
+
16
+ from ipykernel import kernelapp as app
17
+
18
+ app.launch_new_instance()
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/isympy.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Python shell for SymPy.
3
+
4
+ This is just a normal Python shell (IPython shell if you have the
5
+ IPython package installed), that executes the following commands for
6
+ the user:
7
+
8
+ >>> from __future__ import division
9
+ >>> from sympy import *
10
+ >>> x, y, z, t = symbols('x y z t')
11
+ >>> k, m, n = symbols('k m n', integer=True)
12
+ >>> f, g, h = symbols('f g h', cls=Function)
13
+ >>> init_printing()
14
+
15
+ So starting 'isympy' is equivalent to starting Python (or IPython) and
16
+ executing the above commands by hand. It is intended for easy and quick
17
+ experimentation with SymPy. isympy is a good way to use SymPy as an
18
+ interactive calculator. If you have IPython and Matplotlib installed, then
19
+ interactive plotting is enabled by default.
20
+
21
+ COMMAND LINE OPTIONS
22
+ --------------------
23
+
24
+ -c CONSOLE, --console=CONSOLE
25
+
26
+ Use the specified shell (Python or IPython) shell as the console
27
+ backend instead of the default one (IPython if present, Python
28
+ otherwise), e.g.:
29
+
30
+ $isympy -c python
31
+
32
+ CONSOLE must be one of 'ipython' or 'python'
33
+
34
+ -p PRETTY, --pretty PRETTY
35
+
36
+ Setup pretty-printing in SymPy. When pretty-printing is enabled,
37
+ expressions can be printed with Unicode or ASCII. The default is
38
+ to use pretty-printing (with Unicode if the terminal supports it).
39
+ When this option is 'no', expressions will not be pretty-printed
40
+ and ASCII will be used:
41
+
42
+ $isympy -p no
43
+
44
+ PRETTY must be one of 'unicode', 'ascii', or 'no'
45
+
46
+ -t TYPES, --types=TYPES
47
+
48
+ Setup the ground types for the polys. By default, gmpy ground types
49
+ are used if gmpy2 or gmpy is installed, otherwise it falls back to python
50
+ ground types, which are a little bit slower. You can manually
51
+ choose python ground types even if gmpy is installed (e.g., for
52
+ testing purposes):
53
+
54
+ $isympy -t python
55
+
56
+ TYPES must be one of 'gmpy', 'gmpy1' or 'python'
57
+
58
+ Note that the ground type gmpy1 is primarily intended for testing; it
59
+ forces the use of gmpy version 1 even if gmpy2 is available.
60
+
61
+ This is the same as setting the environment variable
62
+ SYMPY_GROUND_TYPES to the given ground type (e.g.,
63
+ SYMPY_GROUND_TYPES='gmpy')
64
+
65
+ The ground types can be determined interactively from the variable
66
+ sympy.polys.domains.GROUND_TYPES.
67
+
68
+ -o ORDER, --order ORDER
69
+
70
+ Setup the ordering of terms for printing. The default is lex, which
71
+ orders terms lexicographically (e.g., x**2 + x + 1). You can choose
72
+ other orderings, such as rev-lex, which will use reverse
73
+ lexicographic ordering (e.g., 1 + x + x**2):
74
+
75
+ $isympy -o rev-lex
76
+
77
+ ORDER must be one of 'lex', 'rev-lex', 'grlex', 'rev-grlex',
78
+ 'grevlex', 'rev-grevlex', 'old', or 'none'.
79
+
80
+ Note that for very large expressions, ORDER='none' may speed up
81
+ printing considerably but the terms will have no canonical order.
82
+
83
+ -q, --quiet
84
+
85
+ Print only Python's and SymPy's versions to stdout at startup.
86
+
87
+ -d, --doctest
88
+
89
+ Use the same format that should be used for doctests. This is
90
+ equivalent to -c python -p no.
91
+
92
+ -C, --no-cache
93
+
94
+ Disable the caching mechanism. Disabling the cache may slow certain
95
+ operations down considerably. This is useful for testing the cache,
96
+ or for benchmarking, as the cache can result in deceptive timings.
97
+
98
+ This is equivalent to setting the environment variable
99
+ SYMPY_USE_CACHE to 'no'.
100
+
101
+ -a, --auto-symbols (requires at least IPython 0.11)
102
+
103
+ Automatically create missing symbols. Normally, typing a name of a
104
+ Symbol that has not been instantiated first would raise NameError,
105
+ but with this option enabled, any undefined name will be
106
+ automatically created as a Symbol.
107
+
108
+ Note that this is intended only for interactive, calculator style
109
+ usage. In a script that uses SymPy, Symbols should be instantiated
110
+ at the top, so that it's clear what they are.
111
+
112
+ This will not override any names that are already defined, which
113
+ includes the single character letters represented by the mnemonic
114
+ QCOSINE (see the "Gotchas and Pitfalls" document in the
115
+ documentation). You can delete existing names by executing "del
116
+ name". If a name is defined, typing "'name' in dir()" will return True.
117
+
118
+ The Symbols that are created using this have default assumptions.
119
+ If you want to place assumptions on symbols, you should create them
120
+ using symbols() or var().
121
+
122
+ Finally, this only works in the top level namespace. So, for
123
+ example, if you define a function in isympy with an undefined
124
+ Symbol, it will not work.
125
+
126
+ See also the -i and -I options.
127
+
128
+ -i, --int-to-Integer (requires at least IPython 0.11)
129
+
130
+ Automatically wrap int literals with Integer. This makes it so that
131
+ things like 1/2 will come out as Rational(1, 2), rather than 0.5. This
132
+ works by preprocessing the source and wrapping all int literals with
133
+ Integer. Note that this will not change the behavior of int literals
134
+ assigned to variables, and it also won't change the behavior of functions
135
+ that return int literals.
136
+
137
+ If you want an int, you can wrap the literal in int(), e.g. int(3)/int(2)
138
+ gives 1.5 (with division imported from __future__).
139
+
140
+ -I, --interactive (requires at least IPython 0.11)
141
+
142
+ This is equivalent to --auto-symbols --int-to-Integer. Future options
143
+ designed for ease of interactive use may be added to this.
144
+
145
+ -D, --debug
146
+
147
+ Enable debugging output. This is the same as setting the
148
+ environment variable SYMPY_DEBUG to 'True'. The debug status is set
149
+ in the variable SYMPY_DEBUG within isympy.
150
+
151
+ -- IPython options
152
+
153
+ Additionally you can pass command line options directly to the IPython
154
+ interpreter (the standard Python shell is not supported). However you
155
+ need to add the '--' separator between two types of options, e.g the
156
+ startup banner option and the colors option. You need to enter the
157
+ options as required by the version of IPython that you are using, too:
158
+
159
+ in IPython 0.11,
160
+
161
+ $isympy -q -- --colors=NoColor
162
+
163
+ or older versions of IPython,
164
+
165
+ $isympy -q -- -colors NoColor
166
+
167
+ See also isympy --help.
168
+ """
169
+
170
+ import os
171
+ import sys
172
+
173
+ # DO NOT IMPORT SYMPY HERE! Or the setting of the sympy environment variables
174
+ # by the command line will break.
175
+
176
+ def main() -> None:
177
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter
178
+
179
+ VERSION = None
180
+ if '--version' in sys.argv:
181
+ # We cannot import sympy before this is run, because flags like -C and
182
+ # -t set environment variables that must be set before SymPy is
183
+ # imported. The only thing we need to import it for is to get the
184
+ # version, which only matters with the --version flag.
185
+ import sympy
186
+ VERSION = sympy.__version__
187
+
188
+ usage = 'isympy [options] -- [ipython options]'
189
+ parser = ArgumentParser(
190
+ usage=usage,
191
+ description=__doc__,
192
+ formatter_class=RawDescriptionHelpFormatter,
193
+ )
194
+
195
+ parser.add_argument('--version', action='version', version=VERSION)
196
+
197
+ parser.add_argument(
198
+ '-c', '--console',
199
+ dest='console',
200
+ action='store',
201
+ default=None,
202
+ choices=['ipython', 'python'],
203
+ metavar='CONSOLE',
204
+ help='select type of interactive session: ipython | python; defaults '
205
+ 'to ipython if IPython is installed, otherwise python')
206
+
207
+ parser.add_argument(
208
+ '-p', '--pretty',
209
+ dest='pretty',
210
+ action='store',
211
+ default=None,
212
+ metavar='PRETTY',
213
+ choices=['unicode', 'ascii', 'no'],
214
+ help='setup pretty printing: unicode | ascii | no; defaults to '
215
+ 'unicode printing if the terminal supports it, otherwise ascii')
216
+
217
+ parser.add_argument(
218
+ '-t', '--types',
219
+ dest='types',
220
+ action='store',
221
+ default=None,
222
+ metavar='TYPES',
223
+ choices=['gmpy', 'gmpy1', 'python'],
224
+ help='setup ground types: gmpy | gmpy1 | python; defaults to gmpy if gmpy2 '
225
+ 'or gmpy is installed, otherwise python')
226
+
227
+ parser.add_argument(
228
+ '-o', '--order',
229
+ dest='order',
230
+ action='store',
231
+ default=None,
232
+ metavar='ORDER',
233
+ choices=['lex', 'grlex', 'grevlex', 'rev-lex', 'rev-grlex', 'rev-grevlex', 'old', 'none'],
234
+ help='setup ordering of terms: [rev-]lex | [rev-]grlex | [rev-]grevlex | old | none; defaults to lex')
235
+
236
+ parser.add_argument(
237
+ '-q', '--quiet',
238
+ dest='quiet',
239
+ action='store_true',
240
+ default=False,
241
+ help='print only version information at startup')
242
+
243
+ parser.add_argument(
244
+ '-d', '--doctest',
245
+ dest='doctest',
246
+ action='store_true',
247
+ default=False,
248
+ help='use the doctest format for output (you can just copy and paste it)')
249
+
250
+ parser.add_argument(
251
+ '-C', '--no-cache',
252
+ dest='cache',
253
+ action='store_false',
254
+ default=True,
255
+ help='disable caching mechanism')
256
+
257
+ parser.add_argument(
258
+ '-a', '--auto-symbols',
259
+ dest='auto_symbols',
260
+ action='store_true',
261
+ default=False,
262
+ help='automatically construct missing symbols')
263
+
264
+ parser.add_argument(
265
+ '-i', '--int-to-Integer',
266
+ dest='auto_int_to_Integer',
267
+ action='store_true',
268
+ default=False,
269
+ help="automatically wrap int literals with Integer")
270
+
271
+ parser.add_argument(
272
+ '-I', '--interactive',
273
+ dest='interactive',
274
+ action='store_true',
275
+ default=False,
276
+ help="equivalent to -a -i")
277
+
278
+ parser.add_argument(
279
+ '-D', '--debug',
280
+ dest='debug',
281
+ action='store_true',
282
+ default=False,
283
+ help='enable debugging output')
284
+
285
+ (options, ipy_args) = parser.parse_known_args()
286
+ if '--' in ipy_args:
287
+ ipy_args.remove('--')
288
+
289
+ if not options.cache:
290
+ os.environ['SYMPY_USE_CACHE'] = 'no'
291
+
292
+ if options.types:
293
+ os.environ['SYMPY_GROUND_TYPES'] = options.types
294
+
295
+ if options.debug:
296
+ os.environ['SYMPY_DEBUG'] = str(options.debug)
297
+
298
+ if options.doctest:
299
+ options.pretty = 'no'
300
+ options.console = 'python'
301
+
302
+ session = options.console
303
+
304
+ if session is not None:
305
+ ipython = session == 'ipython'
306
+ else:
307
+ try:
308
+ import IPython
309
+ ipython = True
310
+ except ImportError:
311
+ if not options.quiet:
312
+ from sympy.interactive.session import no_ipython
313
+ print(no_ipython)
314
+ ipython = False
315
+
316
+ args = {
317
+ 'pretty_print': True,
318
+ 'use_unicode': None,
319
+ 'use_latex': None,
320
+ 'order': None,
321
+ 'argv': ipy_args,
322
+ }
323
+
324
+ if options.pretty == 'unicode':
325
+ args['use_unicode'] = True
326
+ elif options.pretty == 'ascii':
327
+ args['use_unicode'] = False
328
+ elif options.pretty == 'no':
329
+ args['pretty_print'] = False
330
+
331
+ if options.order is not None:
332
+ args['order'] = options.order
333
+
334
+ args['quiet'] = options.quiet
335
+ args['auto_symbols'] = options.auto_symbols or options.interactive
336
+ args['auto_int_to_Integer'] = options.auto_int_to_Integer or options.interactive
337
+
338
+ from sympy.interactive import init_session
339
+ init_session(ipython, **args)
340
+
341
+ if __name__ == "__main__":
342
+ main()
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jsonpointer.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # python-json-pointer - An implementation of the JSON Pointer syntax
4
+ # https://github.com/stefankoegl/python-json-pointer
5
+ #
6
+ # Copyright (c) 2011 Stefan Kögl <[email protected]>
7
+ # All rights reserved.
8
+ #
9
+ # Redistribution and use in source and binary forms, with or without
10
+ # modification, are permitted provided that the following conditions
11
+ # are met:
12
+ #
13
+ # 1. Redistributions of source code must retain the above copyright
14
+ # notice, this list of conditions and the following disclaimer.
15
+ # 2. Redistributions in binary form must reproduce the above copyright
16
+ # notice, this list of conditions and the following disclaimer in the
17
+ # documentation and/or other materials provided with the distribution.
18
+ # 3. The name of the author may not be used to endorse or promote products
19
+ # derived from this software without specific prior written permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24
+ # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26
+ # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30
+ # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ #
32
+
33
+ """ Identify specific nodes in a JSON document (RFC 6901) """
34
+
35
+ # Will be parsed by setup.py to determine package metadata
36
+ __author__ = 'Stefan Kögl <[email protected]>'
37
+ __version__ = '3.0.0'
38
+ __website__ = 'https://github.com/stefankoegl/python-json-pointer'
39
+ __license__ = 'Modified BSD License'
40
+
41
+ import copy
42
+ import re
43
+ from collections.abc import Mapping, Sequence
44
+ from itertools import tee, chain
45
+
46
+ _nothing = object()
47
+
48
+
49
+ def set_pointer(doc, pointer, value, inplace=True):
50
+ """Resolves a pointer against doc and sets the value of the target within doc.
51
+
52
+ With inplace set to true, doc is modified as long as pointer is not the
53
+ root.
54
+
55
+ >>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}}
56
+
57
+ >>> set_pointer(obj, '/foo/anArray/0/prop', 55) == \
58
+ {'foo': {'another prop': {'baz': 'A string'}, 'anArray': [{'prop': 55}]}}
59
+ True
60
+
61
+ >>> set_pointer(obj, '/foo/yet another prop', 'added prop') == \
62
+ {'foo': {'another prop': {'baz': 'A string'}, 'yet another prop': 'added prop', 'anArray': [{'prop': 55}]}}
63
+ True
64
+
65
+ >>> obj = {'foo': {}}
66
+ >>> set_pointer(obj, '/foo/a%20b', 'x') == \
67
+ {'foo': {'a%20b': 'x' }}
68
+ True
69
+ """
70
+
71
+ pointer = JsonPointer(pointer)
72
+ return pointer.set(doc, value, inplace)
73
+
74
+
75
+ def resolve_pointer(doc, pointer, default=_nothing):
76
+ """ Resolves pointer against doc and returns the referenced object
77
+
78
+ >>> obj = {'foo': {'anArray': [ {'prop': 44}], 'another prop': {'baz': 'A string' }}, 'a%20b': 1, 'c d': 2}
79
+
80
+ >>> resolve_pointer(obj, '') == obj
81
+ True
82
+
83
+ >>> resolve_pointer(obj, '/foo') == obj['foo']
84
+ True
85
+
86
+ >>> resolve_pointer(obj, '/foo/another prop') == obj['foo']['another prop']
87
+ True
88
+
89
+ >>> resolve_pointer(obj, '/foo/another prop/baz') == obj['foo']['another prop']['baz']
90
+ True
91
+
92
+ >>> resolve_pointer(obj, '/foo/anArray/0') == obj['foo']['anArray'][0]
93
+ True
94
+
95
+ >>> resolve_pointer(obj, '/some/path', None) == None
96
+ True
97
+
98
+ >>> resolve_pointer(obj, '/a b', None) == None
99
+ True
100
+
101
+ >>> resolve_pointer(obj, '/a%20b') == 1
102
+ True
103
+
104
+ >>> resolve_pointer(obj, '/c d') == 2
105
+ True
106
+
107
+ >>> resolve_pointer(obj, '/c%20d', None) == None
108
+ True
109
+ """
110
+
111
+ pointer = JsonPointer(pointer)
112
+ return pointer.resolve(doc, default)
113
+
114
+
115
+ def pairwise(iterable):
116
+ """ Transforms a list to a list of tuples of adjacent items
117
+
118
+ s -> (s0,s1), (s1,s2), (s2, s3), ...
119
+
120
+ >>> list(pairwise([]))
121
+ []
122
+
123
+ >>> list(pairwise([1]))
124
+ []
125
+
126
+ >>> list(pairwise([1, 2, 3, 4]))
127
+ [(1, 2), (2, 3), (3, 4)]
128
+ """
129
+ a, b = tee(iterable)
130
+ for _ in b:
131
+ break
132
+ return zip(a, b)
133
+
134
+
135
+ class JsonPointerException(Exception):
136
+ pass
137
+
138
+
139
+ class EndOfList(object):
140
+ """Result of accessing element "-" of a list"""
141
+
142
+ def __init__(self, list_):
143
+ self.list_ = list_
144
+
145
+ def __repr__(self):
146
+ return '{cls}({lst})'.format(cls=self.__class__.__name__,
147
+ lst=repr(self.list_))
148
+
149
+
150
+ class JsonPointer(object):
151
+ """A JSON Pointer that can reference parts of a JSON document"""
152
+
153
+ # Array indices must not contain:
154
+ # leading zeros, signs, spaces, decimals, etc
155
+ _RE_ARRAY_INDEX = re.compile('0|[1-9][0-9]*$')
156
+ _RE_INVALID_ESCAPE = re.compile('(~[^01]|~$)')
157
+
158
+ def __init__(self, pointer):
159
+
160
+ # validate escapes
161
+ invalid_escape = self._RE_INVALID_ESCAPE.search(pointer)
162
+ if invalid_escape:
163
+ raise JsonPointerException('Found invalid escape {}'.format(
164
+ invalid_escape.group()))
165
+
166
+ parts = pointer.split('/')
167
+ if parts.pop(0) != '':
168
+ raise JsonPointerException('Location must start with /')
169
+
170
+ parts = [unescape(part) for part in parts]
171
+ self.parts = parts
172
+
173
+ def to_last(self, doc):
174
+ """Resolves ptr until the last step, returns (sub-doc, last-step)"""
175
+
176
+ if not self.parts:
177
+ return doc, None
178
+
179
+ for part in self.parts[:-1]:
180
+ doc = self.walk(doc, part)
181
+
182
+ return doc, JsonPointer.get_part(doc, self.parts[-1])
183
+
184
+ def resolve(self, doc, default=_nothing):
185
+ """Resolves the pointer against doc and returns the referenced object"""
186
+
187
+ for part in self.parts:
188
+
189
+ try:
190
+ doc = self.walk(doc, part)
191
+ except JsonPointerException:
192
+ if default is _nothing:
193
+ raise
194
+ else:
195
+ return default
196
+
197
+ return doc
198
+
199
+ get = resolve
200
+
201
+ def set(self, doc, value, inplace=True):
202
+ """Resolve the pointer against the doc and replace the target with value."""
203
+
204
+ if len(self.parts) == 0:
205
+ if inplace:
206
+ raise JsonPointerException('Cannot set root in place')
207
+ return value
208
+
209
+ if not inplace:
210
+ doc = copy.deepcopy(doc)
211
+
212
+ (parent, part) = self.to_last(doc)
213
+
214
+ if isinstance(parent, Sequence) and part == '-':
215
+ parent.append(value)
216
+ else:
217
+ parent[part] = value
218
+
219
+ return doc
220
+
221
+ @classmethod
222
+ def get_part(cls, doc, part):
223
+ """Returns the next step in the correct type"""
224
+
225
+ if isinstance(doc, Mapping):
226
+ return part
227
+
228
+ elif isinstance(doc, Sequence):
229
+
230
+ if part == '-':
231
+ return part
232
+
233
+ if not JsonPointer._RE_ARRAY_INDEX.match(str(part)):
234
+ raise JsonPointerException("'%s' is not a valid sequence index" % part)
235
+
236
+ return int(part)
237
+
238
+ elif hasattr(doc, '__getitem__'):
239
+ # Allow indexing via ducktyping
240
+ # if the target has defined __getitem__
241
+ return part
242
+
243
+ else:
244
+ raise JsonPointerException("Document '%s' does not support indexing, "
245
+ "must be mapping/sequence or support __getitem__" % type(doc))
246
+
247
+ def get_parts(self):
248
+ """Returns the list of the parts. For example, JsonPointer('/a/b').get_parts() == ['a', 'b']"""
249
+
250
+ return self.parts
251
+
252
+ def walk(self, doc, part):
253
+ """ Walks one step in doc and returns the referenced part """
254
+
255
+ part = JsonPointer.get_part(doc, part)
256
+
257
+ assert hasattr(doc, '__getitem__'), "invalid document type %s" % (type(doc),)
258
+
259
+ if isinstance(doc, Sequence):
260
+ if part == '-':
261
+ return EndOfList(doc)
262
+
263
+ try:
264
+ return doc[part]
265
+
266
+ except IndexError:
267
+ raise JsonPointerException("index '%s' is out of bounds" % (part,))
268
+
269
+ # Else the object is a mapping or supports __getitem__(so assume custom indexing)
270
+ try:
271
+ return doc[part]
272
+
273
+ except KeyError:
274
+ raise JsonPointerException("member '%s' not found in %s" % (part, doc))
275
+
276
+ def contains(self, ptr):
277
+ """ Returns True if self contains the given ptr """
278
+ return self.parts[:len(ptr.parts)] == ptr.parts
279
+
280
+ def __contains__(self, item):
281
+ """ Returns True if self contains the given ptr """
282
+ return self.contains(item)
283
+
284
+ def join(self, suffix):
285
+ """ Returns a new JsonPointer with the given suffix append to this ptr """
286
+ if isinstance(suffix, JsonPointer):
287
+ suffix_parts = suffix.parts
288
+ elif isinstance(suffix, str):
289
+ suffix_parts = JsonPointer(suffix).parts
290
+ else:
291
+ suffix_parts = suffix
292
+ try:
293
+ return JsonPointer.from_parts(chain(self.parts, suffix_parts))
294
+ except: # noqa E722
295
+ raise JsonPointerException("Invalid suffix")
296
+
297
+ def __truediv__(self, suffix): # Python 3
298
+ return self.join(suffix)
299
+
300
+ @property
301
+ def path(self):
302
+ """Returns the string representation of the pointer
303
+
304
+ >>> ptr = JsonPointer('/~0/0/~1').path == '/~0/0/~1'
305
+ """
306
+ parts = [escape(part) for part in self.parts]
307
+ return ''.join('/' + part for part in parts)
308
+
309
+ def __eq__(self, other):
310
+ """Compares a pointer to another object
311
+
312
+ Pointers can be compared by comparing their strings (or splitted
313
+ strings), because no two different parts can point to the same
314
+ structure in an object (eg no different number representations)
315
+ """
316
+
317
+ if not isinstance(other, JsonPointer):
318
+ return False
319
+
320
+ return self.parts == other.parts
321
+
322
+ def __hash__(self):
323
+ return hash(tuple(self.parts))
324
+
325
+ def __str__(self):
326
+ return self.path
327
+
328
+ def __repr__(self):
329
+ return type(self).__name__ + "(" + repr(self.path) + ")"
330
+
331
+ @classmethod
332
+ def from_parts(cls, parts):
333
+ """Constructs a JsonPointer from a list of (unescaped) paths
334
+
335
+ >>> JsonPointer.from_parts(['a', '~', '/', 0]).path == '/a/~0/~1/0'
336
+ True
337
+ """
338
+ parts = [escape(str(part)) for part in parts]
339
+ ptr = cls(''.join('/' + part for part in parts))
340
+ return ptr
341
+
342
+
343
+ def escape(s):
344
+ return s.replace('~', '~0').replace('/', '~1')
345
+
346
+
347
+ def unescape(s):
348
+ return s.replace('~1', '/').replace('~0', '~')
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/jupyter.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """Launch the root jupyter command"""
2
+ from __future__ import annotations
3
+
4
+ if __name__ == "__main__":
5
+ from jupyter_core.command import main
6
+
7
+ main()
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/nest_asyncio.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Patch asyncio to allow nested event loops."""
2
+
3
+ import asyncio
4
+ import asyncio.events as events
5
+ import os
6
+ import sys
7
+ import threading
8
+ from contextlib import contextmanager, suppress
9
+ from heapq import heappop
10
+
11
+
12
+ def apply(loop=None):
13
+ """Patch asyncio to make its event loop reentrant."""
14
+ _patch_asyncio()
15
+ _patch_policy()
16
+ _patch_tornado()
17
+
18
+ loop = loop or asyncio.get_event_loop()
19
+ _patch_loop(loop)
20
+
21
+
22
+ def _patch_asyncio():
23
+ """Patch asyncio module to use pure Python tasks and futures."""
24
+
25
+ def run(main, *, debug=False):
26
+ loop = asyncio.get_event_loop()
27
+ loop.set_debug(debug)
28
+ task = asyncio.ensure_future(main)
29
+ try:
30
+ return loop.run_until_complete(task)
31
+ finally:
32
+ if not task.done():
33
+ task.cancel()
34
+ with suppress(asyncio.CancelledError):
35
+ loop.run_until_complete(task)
36
+
37
+ def _get_event_loop(stacklevel=3):
38
+ loop = events._get_running_loop()
39
+ if loop is None:
40
+ loop = events.get_event_loop_policy().get_event_loop()
41
+ return loop
42
+
43
+ # Use module level _current_tasks, all_tasks and patch run method.
44
+ if hasattr(asyncio, '_nest_patched'):
45
+ return
46
+ if sys.version_info >= (3, 6, 0):
47
+ asyncio.Task = asyncio.tasks._CTask = asyncio.tasks.Task = \
48
+ asyncio.tasks._PyTask
49
+ asyncio.Future = asyncio.futures._CFuture = asyncio.futures.Future = \
50
+ asyncio.futures._PyFuture
51
+ if sys.version_info < (3, 7, 0):
52
+ asyncio.tasks._current_tasks = asyncio.tasks.Task._current_tasks
53
+ asyncio.all_tasks = asyncio.tasks.Task.all_tasks
54
+ if sys.version_info >= (3, 9, 0):
55
+ events._get_event_loop = events.get_event_loop = \
56
+ asyncio.get_event_loop = _get_event_loop
57
+ asyncio.run = run
58
+ asyncio._nest_patched = True
59
+
60
+
61
+ def _patch_policy():
62
+ """Patch the policy to always return a patched loop."""
63
+
64
+ def get_event_loop(self):
65
+ if self._local._loop is None:
66
+ loop = self.new_event_loop()
67
+ _patch_loop(loop)
68
+ self.set_event_loop(loop)
69
+ return self._local._loop
70
+
71
+ policy = events.get_event_loop_policy()
72
+ policy.__class__.get_event_loop = get_event_loop
73
+
74
+
75
+ def _patch_loop(loop):
76
+ """Patch loop to make it reentrant."""
77
+
78
+ def run_forever(self):
79
+ with manage_run(self), manage_asyncgens(self):
80
+ while True:
81
+ self._run_once()
82
+ if self._stopping:
83
+ break
84
+ self._stopping = False
85
+
86
+ def run_until_complete(self, future):
87
+ with manage_run(self):
88
+ f = asyncio.ensure_future(future, loop=self)
89
+ if f is not future:
90
+ f._log_destroy_pending = False
91
+ while not f.done():
92
+ self._run_once()
93
+ if self._stopping:
94
+ break
95
+ if not f.done():
96
+ raise RuntimeError(
97
+ 'Event loop stopped before Future completed.')
98
+ return f.result()
99
+
100
+ def _run_once(self):
101
+ """
102
+ Simplified re-implementation of asyncio's _run_once that
103
+ runs handles as they become ready.
104
+ """
105
+ ready = self._ready
106
+ scheduled = self._scheduled
107
+ while scheduled and scheduled[0]._cancelled:
108
+ heappop(scheduled)
109
+
110
+ timeout = (
111
+ 0 if ready or self._stopping
112
+ else min(max(
113
+ scheduled[0]._when - self.time(), 0), 86400) if scheduled
114
+ else None)
115
+ event_list = self._selector.select(timeout)
116
+ self._process_events(event_list)
117
+
118
+ end_time = self.time() + self._clock_resolution
119
+ while scheduled and scheduled[0]._when < end_time:
120
+ handle = heappop(scheduled)
121
+ ready.append(handle)
122
+
123
+ for _ in range(len(ready)):
124
+ if not ready:
125
+ break
126
+ handle = ready.popleft()
127
+ if not handle._cancelled:
128
+ # preempt the current task so that that checks in
129
+ # Task.__step do not raise
130
+ curr_task = curr_tasks.pop(self, None)
131
+
132
+ try:
133
+ handle._run()
134
+ finally:
135
+ # restore the current task
136
+ if curr_task is not None:
137
+ curr_tasks[self] = curr_task
138
+
139
+ handle = None
140
+
141
+ @contextmanager
142
+ def manage_run(self):
143
+ """Set up the loop for running."""
144
+ self._check_closed()
145
+ old_thread_id = self._thread_id
146
+ old_running_loop = events._get_running_loop()
147
+ try:
148
+ self._thread_id = threading.get_ident()
149
+ events._set_running_loop(self)
150
+ self._num_runs_pending += 1
151
+ if self._is_proactorloop:
152
+ if self._self_reading_future is None:
153
+ self.call_soon(self._loop_self_reading)
154
+ yield
155
+ finally:
156
+ self._thread_id = old_thread_id
157
+ events._set_running_loop(old_running_loop)
158
+ self._num_runs_pending -= 1
159
+ if self._is_proactorloop:
160
+ if (self._num_runs_pending == 0
161
+ and self._self_reading_future is not None):
162
+ ov = self._self_reading_future._ov
163
+ self._self_reading_future.cancel()
164
+ if ov is not None:
165
+ self._proactor._unregister(ov)
166
+ self._self_reading_future = None
167
+
168
+ @contextmanager
169
+ def manage_asyncgens(self):
170
+ if not hasattr(sys, 'get_asyncgen_hooks'):
171
+ # Python version is too old.
172
+ return
173
+ old_agen_hooks = sys.get_asyncgen_hooks()
174
+ try:
175
+ self._set_coroutine_origin_tracking(self._debug)
176
+ if self._asyncgens is not None:
177
+ sys.set_asyncgen_hooks(
178
+ firstiter=self._asyncgen_firstiter_hook,
179
+ finalizer=self._asyncgen_finalizer_hook)
180
+ yield
181
+ finally:
182
+ self._set_coroutine_origin_tracking(False)
183
+ if self._asyncgens is not None:
184
+ sys.set_asyncgen_hooks(*old_agen_hooks)
185
+
186
+ def _check_running(self):
187
+ """Do not throw exception if loop is already running."""
188
+ pass
189
+
190
+ if hasattr(loop, '_nest_patched'):
191
+ return
192
+ if not isinstance(loop, asyncio.BaseEventLoop):
193
+ raise ValueError('Can\'t patch loop of type %s' % type(loop))
194
+ cls = loop.__class__
195
+ cls.run_forever = run_forever
196
+ cls.run_until_complete = run_until_complete
197
+ cls._run_once = _run_once
198
+ cls._check_running = _check_running
199
+ cls._check_runnung = _check_running # typo in Python 3.7 source
200
+ cls._num_runs_pending = 1 if loop.is_running() else 0
201
+ cls._is_proactorloop = (
202
+ os.name == 'nt' and issubclass(cls, asyncio.ProactorEventLoop))
203
+ if sys.version_info < (3, 7, 0):
204
+ cls._set_coroutine_origin_tracking = cls._set_coroutine_wrapper
205
+ curr_tasks = asyncio.tasks._current_tasks \
206
+ if sys.version_info >= (3, 7, 0) else asyncio.Task._current_tasks
207
+ cls._nest_patched = True
208
+
209
+
210
+ def _patch_tornado():
211
+ """
212
+ If tornado is imported before nest_asyncio, make tornado aware of
213
+ the pure-Python asyncio Future.
214
+ """
215
+ if 'tornado' in sys.modules:
216
+ import tornado.concurrent as tc # type: ignore
217
+ tc.Future = asyncio.Future
218
+ if asyncio.Future not in tc.FUTURES:
219
+ tc.FUTURES += (asyncio.Future,)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pandocfilters.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Author: John MacFarlane <[email protected]>
2
+ # Copyright: (C) 2013 John MacFarlane
3
+ # License: BSD3
4
+
5
+ """
6
+ Functions to aid writing python scripts that process the pandoc
7
+ AST serialized as JSON.
8
+ """
9
+
10
+ import codecs
11
+ import hashlib
12
+ import io
13
+ import json
14
+ import os
15
+ import sys
16
+ import atexit
17
+ import shutil
18
+ import tempfile
19
+
20
+
21
+ # some utility-functions: make it easier to create your own filters
22
+
23
+
24
+ def get_filename4code(module, content, ext=None):
25
+ """Generate filename based on content
26
+
27
+ The function ensures that the (temporary) directory exists, so that the
28
+ file can be written.
29
+
30
+ By default, the directory won't be cleaned up,
31
+ so a filter can use the directory as a cache and
32
+ decide not to regenerate if there's no change.
33
+
34
+ In case the user preferres the files to be temporary files,
35
+ an environment variable `PANDOCFILTER_CLEANUP` can be set to
36
+ any non-empty value such as `1` to
37
+ make sure the directory is created in a temporary location and removed
38
+ after finishing the filter. In this case there's no caching and files
39
+ will be regenerated each time the filter is run.
40
+
41
+ Example:
42
+ filename = get_filename4code("myfilter", code)
43
+ """
44
+ if os.getenv('PANDOCFILTER_CLEANUP'):
45
+ imagedir = tempfile.mkdtemp(prefix=module)
46
+ atexit.register(lambda: shutil.rmtree(imagedir))
47
+ else:
48
+ imagedir = module + "-images"
49
+ fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
50
+ try:
51
+ os.makedirs(imagedir, exist_ok=True)
52
+ sys.stderr.write('Created directory ' + imagedir + '\n')
53
+ except OSError:
54
+ sys.stderr.write('Could not create directory "' + imagedir + '"\n')
55
+ if ext:
56
+ fn += "." + ext
57
+ return os.path.join(imagedir, fn)
58
+
59
+ def get_value(kv, key, value = None):
60
+ """get value from the keyvalues (options)"""
61
+ res = []
62
+ for k, v in kv:
63
+ if k == key:
64
+ value = v
65
+ else:
66
+ res.append([k, v])
67
+ return value, res
68
+
69
+ def get_caption(kv):
70
+ """get caption from the keyvalues (options)
71
+
72
+ Example:
73
+ if key == 'CodeBlock':
74
+ [[ident, classes, keyvals], code] = value
75
+ caption, typef, keyvals = get_caption(keyvals)
76
+ ...
77
+ return Para([Image([ident, [], keyvals], caption, [filename, typef])])
78
+ """
79
+ caption = []
80
+ typef = ""
81
+ value, res = get_value(kv, u"caption")
82
+ if value is not None:
83
+ caption = [Str(value)]
84
+ typef = "fig:"
85
+
86
+ return caption, typef, res
87
+
88
+
89
+ def get_extension(format, default, **alternates):
90
+ """get the extension for the result, needs a default and some specialisations
91
+
92
+ Example:
93
+ filetype = get_extension(format, "png", html="svg", latex="eps")
94
+ """
95
+ try:
96
+ return alternates[format]
97
+ except KeyError:
98
+ return default
99
+
100
+ # end of utilities
101
+
102
+
103
+ def walk(x, action, format, meta):
104
+ """Walk a tree, applying an action to every object.
105
+ Returns a modified tree. An action is a function of the form
106
+ `action(key, value, format, meta)`, where:
107
+
108
+ * `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
109
+ * the contents of the object (e.g. a string for 'Str', a list of
110
+ inline elements for 'Para')
111
+ * `format` is the target output format (as supplied by the
112
+ `format` argument of `walk`)
113
+ * `meta` is the document's metadata
114
+
115
+ The return of an action is either:
116
+
117
+ * `None`: this means that the object should remain unchanged
118
+ * a pandoc object: this will replace the original object
119
+ * a list of pandoc objects: these will replace the original object; the
120
+ list is merged with the neighbors of the orignal objects (spliced into
121
+ the list the original object belongs to); returning an empty list deletes
122
+ the object
123
+ """
124
+ if isinstance(x, list):
125
+ array = []
126
+ for item in x:
127
+ if isinstance(item, dict) and 't' in item:
128
+ res = action(item['t'],
129
+ item['c'] if 'c' in item else None, format, meta)
130
+ if res is None:
131
+ array.append(walk(item, action, format, meta))
132
+ elif isinstance(res, list):
133
+ for z in res:
134
+ array.append(walk(z, action, format, meta))
135
+ else:
136
+ array.append(walk(res, action, format, meta))
137
+ else:
138
+ array.append(walk(item, action, format, meta))
139
+ return array
140
+ elif isinstance(x, dict):
141
+ return {k: walk(v, action, format, meta) for k, v in x.items()}
142
+ else:
143
+ return x
144
+
145
+ def toJSONFilter(action):
146
+ """Like `toJSONFilters`, but takes a single action as argument.
147
+ """
148
+ toJSONFilters([action])
149
+
150
+
151
+ def toJSONFilters(actions):
152
+ """Generate a JSON-to-JSON filter from stdin to stdout
153
+
154
+ The filter:
155
+
156
+ * reads a JSON-formatted pandoc document from stdin
157
+ * transforms it by walking the tree and performing the actions
158
+ * returns a new JSON-formatted pandoc document to stdout
159
+
160
+ The argument `actions` is a list of functions of the form
161
+ `action(key, value, format, meta)`, as described in more
162
+ detail under `walk`.
163
+
164
+ This function calls `applyJSONFilters`, with the `format`
165
+ argument provided by the first command-line argument,
166
+ if present. (Pandoc sets this by default when calling
167
+ filters.)
168
+ """
169
+ try:
170
+ input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
171
+ except AttributeError:
172
+ # Python 2 does not have sys.stdin.buffer.
173
+ # REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
174
+ input_stream = codecs.getreader("utf-8")(sys.stdin)
175
+
176
+ source = input_stream.read()
177
+ if len(sys.argv) > 1:
178
+ format = sys.argv[1]
179
+ else:
180
+ format = ""
181
+
182
+ sys.stdout.write(applyJSONFilters(actions, source, format))
183
+
184
+ def applyJSONFilters(actions, source, format=""):
185
+ """Walk through JSON structure and apply filters
186
+
187
+ This:
188
+
189
+ * reads a JSON-formatted pandoc document from a source string
190
+ * transforms it by walking the tree and performing the actions
191
+ * returns a new JSON-formatted pandoc document as a string
192
+
193
+ The `actions` argument is a list of functions (see `walk`
194
+ for a full description).
195
+
196
+ The argument `source` is a string encoded JSON object.
197
+
198
+ The argument `format` is a string describing the output format.
199
+
200
+ Returns a the new JSON-formatted pandoc document.
201
+ """
202
+
203
+ doc = json.loads(source)
204
+
205
+ if 'meta' in doc:
206
+ meta = doc['meta']
207
+ elif doc[0]: # old API
208
+ meta = doc[0]['unMeta']
209
+ else:
210
+ meta = {}
211
+ altered = doc
212
+ for action in actions:
213
+ altered = walk(altered, action, format, meta)
214
+
215
+ return json.dumps(altered)
216
+
217
+
218
+ def stringify(x):
219
+ """Walks the tree x and returns concatenated string content,
220
+ leaving out all formatting.
221
+ """
222
+ result = []
223
+
224
+ def go(key, val, format, meta):
225
+ if key in ['Str', 'MetaString']:
226
+ result.append(val)
227
+ elif key == 'Code':
228
+ result.append(val[1])
229
+ elif key == 'Math':
230
+ result.append(val[1])
231
+ elif key == 'LineBreak':
232
+ result.append(" ")
233
+ elif key == 'SoftBreak':
234
+ result.append(" ")
235
+ elif key == 'Space':
236
+ result.append(" ")
237
+
238
+ walk(x, go, "", {})
239
+ return ''.join(result)
240
+
241
+
242
+ def attributes(attrs):
243
+ """Returns an attribute list, constructed from the
244
+ dictionary attrs.
245
+ """
246
+ attrs = attrs or {}
247
+ ident = attrs.get("id", "")
248
+ classes = attrs.get("classes", [])
249
+ keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
250
+ return [ident, classes, keyvals]
251
+
252
+
253
+ def elt(eltType, numargs):
254
+ def fun(*args):
255
+ lenargs = len(args)
256
+ if lenargs != numargs:
257
+ raise ValueError(eltType + ' expects ' + str(numargs) +
258
+ ' arguments, but given ' + str(lenargs))
259
+ if numargs == 0:
260
+ xs = []
261
+ elif len(args) == 1:
262
+ xs = args[0]
263
+ else:
264
+ xs = list(args)
265
+ return {'t': eltType, 'c': xs}
266
+ return fun
267
+
268
+ # Constructors for block elements
269
+
270
+ Plain = elt('Plain', 1)
271
+ Para = elt('Para', 1)
272
+ CodeBlock = elt('CodeBlock', 2)
273
+ RawBlock = elt('RawBlock', 2)
274
+ BlockQuote = elt('BlockQuote', 1)
275
+ OrderedList = elt('OrderedList', 2)
276
+ BulletList = elt('BulletList', 1)
277
+ DefinitionList = elt('DefinitionList', 1)
278
+ Header = elt('Header', 3)
279
+ HorizontalRule = elt('HorizontalRule', 0)
280
+ Table = elt('Table', 5)
281
+ Div = elt('Div', 2)
282
+ Null = elt('Null', 0)
283
+
284
+ # Constructors for inline elements
285
+
286
+ Str = elt('Str', 1)
287
+ Emph = elt('Emph', 1)
288
+ Strong = elt('Strong', 1)
289
+ Strikeout = elt('Strikeout', 1)
290
+ Superscript = elt('Superscript', 1)
291
+ Subscript = elt('Subscript', 1)
292
+ SmallCaps = elt('SmallCaps', 1)
293
+ Quoted = elt('Quoted', 2)
294
+ Cite = elt('Cite', 2)
295
+ Code = elt('Code', 2)
296
+ Space = elt('Space', 0)
297
+ LineBreak = elt('LineBreak', 0)
298
+ Math = elt('Math', 2)
299
+ RawInline = elt('RawInline', 2)
300
+ Link = elt('Link', 3)
301
+ Image = elt('Image', 3)
302
+ Note = elt('Note', 1)
303
+ SoftBreak = elt('SoftBreak', 0)
304
+ Span = elt('Span', 2)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/pip-22.0.2.virtualenv ADDED
File without changes
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/protobuf-3.20.3-py3.10-nspkg.pth ADDED
@@ -0,0 +1 @@
 
 
1
+ import sys, types, os;has_mfs = sys.version_info > (3, 5);p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('google',));importlib = has_mfs and __import__('importlib.util');has_mfs and __import__('importlib.machinery');m = has_mfs and sys.modules.setdefault('google', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('google', [os.path.dirname(p)])));m = m or sys.modules.setdefault('google', types.ModuleType('google'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__config__.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is generated by SciPy's build process
2
+ # It contains system_info results at the time of building this package.
3
+ from enum import Enum
4
+
5
+ __all__ = ["show"]
6
+ _built_with_meson = True
7
+
8
+
9
+ class DisplayModes(Enum):
10
+ stdout = "stdout"
11
+ dicts = "dicts"
12
+
13
+
14
+ def _cleanup(d):
15
+ """
16
+ Removes empty values in a `dict` recursively
17
+ This ensures we remove values that Meson could not provide to CONFIG
18
+ """
19
+ if isinstance(d, dict):
20
+ return { k: _cleanup(v) for k, v in d.items() if v != '' and _cleanup(v) != '' }
21
+ else:
22
+ return d
23
+
24
+
25
+ CONFIG = _cleanup(
26
+ {
27
+ "Compilers": {
28
+ "c": {
29
+ "name": "gcc",
30
+ "linker": r"ld.bfd",
31
+ "version": "10.2.1",
32
+ "commands": r"cc",
33
+ "args": r"",
34
+ "linker args": r"",
35
+ },
36
+ "cython": {
37
+ "name": r"cython",
38
+ "linker": r"cython",
39
+ "version": r"3.0.11",
40
+ "commands": r"cython",
41
+ "args": r"",
42
+ "linker args": r"",
43
+ },
44
+ "c++": {
45
+ "name": "gcc",
46
+ "linker": r"ld.bfd",
47
+ "version": "10.2.1",
48
+ "commands": r"c++",
49
+ "args": r"",
50
+ "linker args": r"",
51
+ },
52
+ "fortran": {
53
+ "name": "gcc",
54
+ "linker": r"ld.bfd",
55
+ "version": "10.2.1",
56
+ "commands": r"gfortran",
57
+ "args": r"",
58
+ "linker args": r"",
59
+ },
60
+ "pythran": {
61
+ "version": r"0.16.1",
62
+ "include directory": r"../../tmp/pip-build-env-h_xz8lfs/overlay/lib/python3.10/site-packages/pythran"
63
+ },
64
+ },
65
+ "Machine Information": {
66
+ "host": {
67
+ "cpu": r"x86_64",
68
+ "family": r"x86_64",
69
+ "endian": r"little",
70
+ "system": r"linux",
71
+ },
72
+ "build": {
73
+ "cpu": r"x86_64",
74
+ "family": r"x86_64",
75
+ "endian": r"little",
76
+ "system": r"linux",
77
+ },
78
+ "cross-compiled": bool("False".lower().replace('false', '')),
79
+ },
80
+ "Build Dependencies": {
81
+ "blas": {
82
+ "name": "scipy-openblas",
83
+ "found": bool("True".lower().replace('false', '')),
84
+ "version": "0.3.27.dev",
85
+ "detection method": "pkgconfig",
86
+ "include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
87
+ "lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
88
+ "openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
89
+ "pc file directory": r"/project",
90
+ },
91
+ "lapack": {
92
+ "name": "scipy-openblas",
93
+ "found": bool("True".lower().replace('false', '')),
94
+ "version": "0.3.27.dev",
95
+ "detection method": "pkgconfig",
96
+ "include directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/include",
97
+ "lib directory": r"/opt/_internal/cpython-3.10.14/lib/python3.10/site-packages/scipy_openblas32/lib",
98
+ "openblas configuration": r"OpenBLAS 0.3.27.dev DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=64",
99
+ "pc file directory": r"/project",
100
+ },
101
+ "pybind11": {
102
+ "name": "pybind11",
103
+ "version": "2.12.0",
104
+ "detection method": "config-tool",
105
+ "include directory": r"unknown",
106
+ },
107
+ },
108
+ "Python Information": {
109
+ "path": r"/opt/python/cp310-cp310/bin/python",
110
+ "version": "3.10",
111
+ },
112
+ }
113
+ )
114
+
115
+
116
+ def _check_pyyaml():
117
+ import yaml
118
+
119
+ return yaml
120
+
121
+
122
+ def show(mode=DisplayModes.stdout.value):
123
+ """
124
+ Show libraries and system information on which SciPy was built
125
+ and is being used
126
+
127
+ Parameters
128
+ ----------
129
+ mode : {`'stdout'`, `'dicts'`}, optional.
130
+ Indicates how to display the config information.
131
+ `'stdout'` prints to console, `'dicts'` returns a dictionary
132
+ of the configuration.
133
+
134
+ Returns
135
+ -------
136
+ out : {`dict`, `None`}
137
+ If mode is `'dicts'`, a dict is returned, else None
138
+
139
+ Notes
140
+ -----
141
+ 1. The `'stdout'` mode will give more readable
142
+ output if ``pyyaml`` is installed
143
+
144
+ """
145
+ if mode == DisplayModes.stdout.value:
146
+ try: # Non-standard library, check import
147
+ yaml = _check_pyyaml()
148
+
149
+ print(yaml.dump(CONFIG))
150
+ except ModuleNotFoundError:
151
+ import warnings
152
+ import json
153
+
154
+ warnings.warn("Install `pyyaml` for better output", stacklevel=1)
155
+ print(json.dumps(CONFIG, indent=2))
156
+ elif mode == DisplayModes.dicts.value:
157
+ return CONFIG
158
+ else:
159
+ raise AttributeError(
160
+ f"Invalid `mode`, use one of: {', '.join([e.value for e in DisplayModes])}"
161
+ )
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/__init__.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SciPy: A scientific computing package for Python
3
+ ================================================
4
+
5
+ Documentation is available in the docstrings and
6
+ online at https://docs.scipy.org.
7
+
8
+ Subpackages
9
+ -----------
10
+ Using any of these subpackages requires an explicit import. For example,
11
+ ``import scipy.cluster``.
12
+
13
+ ::
14
+
15
+ cluster --- Vector Quantization / Kmeans
16
+ constants --- Physical and mathematical constants and units
17
+ datasets --- Dataset methods
18
+ fft --- Discrete Fourier transforms
19
+ fftpack --- Legacy discrete Fourier transforms
20
+ integrate --- Integration routines
21
+ interpolate --- Interpolation Tools
22
+ io --- Data input and output
23
+ linalg --- Linear algebra routines
24
+ misc --- Utilities that don't have another home.
25
+ ndimage --- N-D image package
26
+ odr --- Orthogonal Distance Regression
27
+ optimize --- Optimization Tools
28
+ signal --- Signal Processing Tools
29
+ sparse --- Sparse Matrices
30
+ spatial --- Spatial data structures and algorithms
31
+ special --- Special functions
32
+ stats --- Statistical Functions
33
+
34
+ Public API in the main SciPy namespace
35
+ --------------------------------------
36
+ ::
37
+
38
+ __version__ --- SciPy version string
39
+ LowLevelCallable --- Low-level callback function
40
+ show_config --- Show scipy build configuration
41
+ test --- Run scipy unittests
42
+
43
+ """
44
+
45
+ import importlib as _importlib
46
+
47
+ from numpy import __version__ as __numpy_version__
48
+
49
+
50
+ try:
51
+ from scipy.__config__ import show as show_config
52
+ except ImportError as e:
53
+ msg = """Error importing SciPy: you cannot import SciPy while
54
+ being in scipy source directory; please exit the SciPy source
55
+ tree first and relaunch your Python interpreter."""
56
+ raise ImportError(msg) from e
57
+
58
+
59
+ from scipy.version import version as __version__
60
+
61
+
62
+ # Allow distributors to run custom init code
63
+ from . import _distributor_init
64
+ del _distributor_init
65
+
66
+
67
+ from scipy._lib import _pep440
68
+ # In maintenance branch, change to np_maxversion N+3 if numpy is at N
69
+ np_minversion = '1.23.5'
70
+ np_maxversion = '2.3.0'
71
+ if (_pep440.parse(__numpy_version__) < _pep440.Version(np_minversion) or
72
+ _pep440.parse(__numpy_version__) >= _pep440.Version(np_maxversion)):
73
+ import warnings
74
+ warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
75
+ f" is required for this version of SciPy (detected "
76
+ f"version {__numpy_version__})",
77
+ UserWarning, stacklevel=2)
78
+ del _pep440
79
+
80
+
81
+ # This is the first import of an extension module within SciPy. If there's
82
+ # a general issue with the install, such that extension modules are missing
83
+ # or cannot be imported, this is where we'll get a failure - so give an
84
+ # informative error message.
85
+ try:
86
+ from scipy._lib._ccallback import LowLevelCallable
87
+ except ImportError as e:
88
+ msg = "The `scipy` install you are using seems to be broken, " + \
89
+ "(extension modules cannot be imported), " + \
90
+ "please try reinstalling."
91
+ raise ImportError(msg) from e
92
+
93
+
94
+ from scipy._lib._testutils import PytestTester
95
+ test = PytestTester(__name__)
96
+ del PytestTester
97
+
98
+
99
+ submodules = [
100
+ 'cluster',
101
+ 'constants',
102
+ 'datasets',
103
+ 'fft',
104
+ 'fftpack',
105
+ 'integrate',
106
+ 'interpolate',
107
+ 'io',
108
+ 'linalg',
109
+ 'misc',
110
+ 'ndimage',
111
+ 'odr',
112
+ 'optimize',
113
+ 'signal',
114
+ 'sparse',
115
+ 'spatial',
116
+ 'special',
117
+ 'stats'
118
+ ]
119
+
120
+ __all__ = submodules + [
121
+ 'LowLevelCallable',
122
+ 'test',
123
+ 'show_config',
124
+ '__version__',
125
+ ]
126
+
127
+
128
+ def __dir__():
129
+ return __all__
130
+
131
+
132
+ def __getattr__(name):
133
+ if name in submodules:
134
+ return _importlib.import_module(f'scipy.{name}')
135
+ else:
136
+ try:
137
+ return globals()[name]
138
+ except KeyError:
139
+ raise AttributeError(
140
+ f"Module 'scipy' has no attribute '{name}'"
141
+ )
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/_distributor_init.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Distributor init file
2
+
3
+ Distributors: you can replace the contents of this file with your own custom
4
+ code to support particular distributions of SciPy.
5
+
6
+ For example, this is a good place to put any checks for hardware requirements
7
+ or BLAS/LAPACK library initialization.
8
+
9
+ The SciPy standard source distribution will not put code in this file beyond
10
+ the try-except import of `_distributor_init_local` (which is not part of a
11
+ standard source distribution), so you can safely replace this file with your
12
+ own version.
13
+ """
14
+
15
+ try:
16
+ from . import _distributor_init_local # noqa: F401
17
+ except ImportError:
18
+ pass
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/conftest.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pytest customization
2
+ import json
3
+ import os
4
+ import warnings
5
+ import tempfile
6
+ from contextlib import contextmanager
7
+
8
+ import numpy as np
9
+ import numpy.testing as npt
10
+ import pytest
11
+ import hypothesis
12
+
13
+ from scipy._lib._fpumode import get_fpu_mode
14
+ from scipy._lib._testutils import FPUModeChangeWarning
15
+ from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_DEVICE
16
+ from scipy._lib import _pep440
17
+
18
+ try:
19
+ from scipy_doctest.conftest import dt_config
20
+ HAVE_SCPDT = True
21
+ except ModuleNotFoundError:
22
+ HAVE_SCPDT = False
23
+
24
+
25
+ def pytest_configure(config):
26
+ config.addinivalue_line("markers",
27
+ "slow: Tests that are very slow.")
28
+ config.addinivalue_line("markers",
29
+ "xslow: mark test as extremely slow (not run unless explicitly requested)")
30
+ config.addinivalue_line("markers",
31
+ "xfail_on_32bit: mark test as failing on 32-bit platforms")
32
+ try:
33
+ import pytest_timeout # noqa:F401
34
+ except Exception:
35
+ config.addinivalue_line(
36
+ "markers", 'timeout: mark a test for a non-default timeout')
37
+ try:
38
+ # This is a more reliable test of whether pytest_fail_slow is installed
39
+ # When I uninstalled it, `import pytest_fail_slow` didn't fail!
40
+ from pytest_fail_slow import parse_duration # type: ignore[import-not-found] # noqa:F401,E501
41
+ except Exception:
42
+ config.addinivalue_line(
43
+ "markers", 'fail_slow: mark a test for a non-default timeout failure')
44
+ config.addinivalue_line("markers",
45
+ "skip_xp_backends(*backends, reasons=None, np_only=False, cpu_only=False): "
46
+ "mark the desired skip configuration for the `skip_xp_backends` fixture.")
47
+
48
+
49
+ def pytest_runtest_setup(item):
50
+ mark = item.get_closest_marker("xslow")
51
+ if mark is not None:
52
+ try:
53
+ v = int(os.environ.get('SCIPY_XSLOW', '0'))
54
+ except ValueError:
55
+ v = False
56
+ if not v:
57
+ pytest.skip("very slow test; "
58
+ "set environment variable SCIPY_XSLOW=1 to run it")
59
+ mark = item.get_closest_marker("xfail_on_32bit")
60
+ if mark is not None and np.intp(0).itemsize < 8:
61
+ pytest.xfail(f'Fails on our 32-bit test platform(s): {mark.args[0]}')
62
+
63
+ # Older versions of threadpoolctl have an issue that may lead to this
64
+ # warning being emitted, see gh-14441
65
+ with npt.suppress_warnings() as sup:
66
+ sup.filter(pytest.PytestUnraisableExceptionWarning)
67
+
68
+ try:
69
+ from threadpoolctl import threadpool_limits
70
+
71
+ HAS_THREADPOOLCTL = True
72
+ except Exception: # observed in gh-14441: (ImportError, AttributeError)
73
+ # Optional dependency only. All exceptions are caught, for robustness
74
+ HAS_THREADPOOLCTL = False
75
+
76
+ if HAS_THREADPOOLCTL:
77
+ # Set the number of openmp threads based on the number of workers
78
+ # xdist is using to prevent oversubscription. Simplified version of what
79
+ # sklearn does (it can rely on threadpoolctl and its builtin OpenMP helper
80
+ # functions)
81
+ try:
82
+ xdist_worker_count = int(os.environ['PYTEST_XDIST_WORKER_COUNT'])
83
+ except KeyError:
84
+ # raises when pytest-xdist is not installed
85
+ return
86
+
87
+ if not os.getenv('OMP_NUM_THREADS'):
88
+ max_openmp_threads = os.cpu_count() // 2 # use nr of physical cores
89
+ threads_per_worker = max(max_openmp_threads // xdist_worker_count, 1)
90
+ try:
91
+ threadpool_limits(threads_per_worker, user_api='blas')
92
+ except Exception:
93
+ # May raise AttributeError for older versions of OpenBLAS.
94
+ # Catch any error for robustness.
95
+ return
96
+
97
+
98
+ @pytest.fixture(scope="function", autouse=True)
99
+ def check_fpu_mode(request):
100
+ """
101
+ Check FPU mode was not changed during the test.
102
+ """
103
+ old_mode = get_fpu_mode()
104
+ yield
105
+ new_mode = get_fpu_mode()
106
+
107
+ if old_mode != new_mode:
108
+ warnings.warn(f"FPU mode changed from {old_mode:#x} to {new_mode:#x} during "
109
+ "the test",
110
+ category=FPUModeChangeWarning, stacklevel=0)
111
+
112
+
113
+ # Array API backend handling
114
+ xp_available_backends = {'numpy': np}
115
+
116
+ if SCIPY_ARRAY_API and isinstance(SCIPY_ARRAY_API, str):
117
+ # fill the dict of backends with available libraries
118
+ try:
119
+ import array_api_strict
120
+ xp_available_backends.update({'array_api_strict': array_api_strict})
121
+ if _pep440.parse(array_api_strict.__version__) < _pep440.Version('2.0'):
122
+ raise ImportError("array-api-strict must be >= version 2.0")
123
+ array_api_strict.set_array_api_strict_flags(
124
+ api_version='2023.12'
125
+ )
126
+ except ImportError:
127
+ pass
128
+
129
+ try:
130
+ import torch # type: ignore[import-not-found]
131
+ xp_available_backends.update({'pytorch': torch})
132
+ # can use `mps` or `cpu`
133
+ torch.set_default_device(SCIPY_DEVICE)
134
+ except ImportError:
135
+ pass
136
+
137
+ try:
138
+ import cupy # type: ignore[import-not-found]
139
+ xp_available_backends.update({'cupy': cupy})
140
+ except ImportError:
141
+ pass
142
+
143
+ try:
144
+ import jax.numpy # type: ignore[import-not-found]
145
+ xp_available_backends.update({'jax.numpy': jax.numpy})
146
+ jax.config.update("jax_enable_x64", True)
147
+ jax.config.update("jax_default_device", jax.devices(SCIPY_DEVICE)[0])
148
+ except ImportError:
149
+ pass
150
+
151
+ # by default, use all available backends
152
+ if SCIPY_ARRAY_API.lower() not in ("1", "true"):
153
+ SCIPY_ARRAY_API_ = json.loads(SCIPY_ARRAY_API)
154
+
155
+ if 'all' in SCIPY_ARRAY_API_:
156
+ pass # same as True
157
+ else:
158
+ # only select a subset of backend by filtering out the dict
159
+ try:
160
+ xp_available_backends = {
161
+ backend: xp_available_backends[backend]
162
+ for backend in SCIPY_ARRAY_API_
163
+ }
164
+ except KeyError:
165
+ msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}"
166
+ raise ValueError(msg)
167
+
168
+ if 'cupy' in xp_available_backends:
169
+ SCIPY_DEVICE = 'cuda'
170
+
171
+ array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values())
172
+
173
+ skip_xp_invalid_arg = pytest.mark.skipif(SCIPY_ARRAY_API,
174
+ reason = ('Test involves masked arrays, object arrays, or other types '
175
+ 'that are not valid input when `SCIPY_ARRAY_API` is used.'))
176
+
177
+
178
+ @pytest.fixture
179
+ def skip_xp_backends(xp, request):
180
+ """
181
+ Skip based on the ``skip_xp_backends`` marker.
182
+
183
+ Parameters
184
+ ----------
185
+ *backends : tuple
186
+ Backends to skip, e.g. ``("array_api_strict", "torch")``.
187
+ These are overriden when ``np_only`` is ``True``, and are not
188
+ necessary to provide for non-CPU backends when ``cpu_only`` is ``True``.
189
+ reasons : list, optional
190
+ A list of reasons for each skip. When ``np_only`` is ``True``,
191
+ this should be a singleton list. Otherwise, this should be a list
192
+ of reasons, one for each corresponding backend in ``backends``.
193
+ If unprovided, default reasons are used. Note that it is not possible
194
+ to specify a custom reason with ``cpu_only``. Default: ``None``.
195
+ np_only : bool, optional
196
+ When ``True``, the test is skipped for all backends other
197
+ than the default NumPy backend. There is no need to provide
198
+ any ``backends`` in this case. To specify a reason, pass a
199
+ singleton list to ``reasons``. Default: ``False``.
200
+ cpu_only : bool, optional
201
+ When ``True``, the test is skipped on non-CPU devices.
202
+ There is no need to provide any ``backends`` in this case,
203
+ but any ``backends`` will also be skipped on the CPU.
204
+ Default: ``False``.
205
+ """
206
+ if "skip_xp_backends" not in request.keywords:
207
+ return
208
+ backends = request.keywords["skip_xp_backends"].args
209
+ kwargs = request.keywords["skip_xp_backends"].kwargs
210
+ np_only = kwargs.get("np_only", False)
211
+ cpu_only = kwargs.get("cpu_only", False)
212
+ if np_only:
213
+ reasons = kwargs.get("reasons", ["do not run with non-NumPy backends."])
214
+ reason = reasons[0]
215
+ if xp.__name__ != 'numpy':
216
+ pytest.skip(reason=reason)
217
+ return
218
+ if cpu_only:
219
+ reason = "do not run with `SCIPY_ARRAY_API` set and not on CPU"
220
+ if SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu':
221
+ if xp.__name__ == 'cupy':
222
+ pytest.skip(reason=reason)
223
+ elif xp.__name__ == 'torch':
224
+ if 'cpu' not in xp.empty(0).device.type:
225
+ pytest.skip(reason=reason)
226
+ elif xp.__name__ == 'jax.numpy':
227
+ for d in xp.empty(0).devices():
228
+ if 'cpu' not in d.device_kind:
229
+ pytest.skip(reason=reason)
230
+
231
+ if backends is not None:
232
+ reasons = kwargs.get("reasons", False)
233
+ for i, backend in enumerate(backends):
234
+ if xp.__name__ == backend:
235
+ if not reasons:
236
+ reason = f"do not run with array API backend: {backend}"
237
+ else:
238
+ reason = reasons[i]
239
+ pytest.skip(reason=reason)
240
+
241
+
242
+ # Following the approach of NumPy's conftest.py...
243
+ # Use a known and persistent tmpdir for hypothesis' caches, which
244
+ # can be automatically cleared by the OS or user.
245
+ hypothesis.configuration.set_hypothesis_home_dir(
246
+ os.path.join(tempfile.gettempdir(), ".hypothesis")
247
+ )
248
+
249
+ # We register two custom profiles for SciPy - for details see
250
+ # https://hypothesis.readthedocs.io/en/latest/settings.html
251
+ # The first is designed for our own CI runs; the latter also
252
+ # forces determinism and is designed for use via scipy.test()
253
+ hypothesis.settings.register_profile(
254
+ name="nondeterministic", deadline=None, print_blob=True,
255
+ )
256
+ hypothesis.settings.register_profile(
257
+ name="deterministic",
258
+ deadline=None, print_blob=True, database=None, derandomize=True,
259
+ suppress_health_check=list(hypothesis.HealthCheck),
260
+ )
261
+
262
+ # Profile is currently set by environment variable `SCIPY_HYPOTHESIS_PROFILE`
263
+ # In the future, it would be good to work the choice into dev.py.
264
+ SCIPY_HYPOTHESIS_PROFILE = os.environ.get("SCIPY_HYPOTHESIS_PROFILE",
265
+ "deterministic")
266
+ hypothesis.settings.load_profile(SCIPY_HYPOTHESIS_PROFILE)
267
+
268
+
269
+ ############################################################################
270
+ # doctesting stuff
271
+
272
+ if HAVE_SCPDT:
273
+
274
+ # FIXME: populate the dict once
275
+ @contextmanager
276
+ def warnings_errors_and_rng(test=None):
277
+ """Temporarily turn (almost) all warnings to errors.
278
+
279
+ Filter out known warnings which we allow.
280
+ """
281
+ known_warnings = dict()
282
+
283
+ # these functions are known to emit "divide by zero" RuntimeWarnings
284
+ divide_by_zero = [
285
+ 'scipy.linalg.norm', 'scipy.ndimage.center_of_mass',
286
+ ]
287
+ for name in divide_by_zero:
288
+ known_warnings[name] = dict(category=RuntimeWarning,
289
+ message='divide by zero')
290
+
291
+ # Deprecated stuff in scipy.signal and elsewhere
292
+ deprecated = [
293
+ 'scipy.signal.cwt', 'scipy.signal.morlet', 'scipy.signal.morlet2',
294
+ 'scipy.signal.ricker',
295
+ 'scipy.integrate.simpson',
296
+ 'scipy.interpolate.interp2d',
297
+ ]
298
+ for name in deprecated:
299
+ known_warnings[name] = dict(category=DeprecationWarning)
300
+
301
+ from scipy import integrate
302
+ # the funcions are known to emit IntergrationWarnings
303
+ integration_w = ['scipy.special.ellip_normal',
304
+ 'scipy.special.ellip_harm_2',
305
+ ]
306
+ for name in integration_w:
307
+ known_warnings[name] = dict(category=integrate.IntegrationWarning,
308
+ message='The occurrence of roundoff')
309
+
310
+ # scipy.stats deliberately emits UserWarnings sometimes
311
+ user_w = ['scipy.stats.anderson_ksamp', 'scipy.stats.kurtosistest',
312
+ 'scipy.stats.normaltest', 'scipy.sparse.linalg.norm']
313
+ for name in user_w:
314
+ known_warnings[name] = dict(category=UserWarning)
315
+
316
+ # additional one-off warnings to filter
317
+ dct = {
318
+ 'scipy.sparse.linalg.norm':
319
+ dict(category=UserWarning, message="Exited at iteration"),
320
+ # tutorials
321
+ 'linalg.rst':
322
+ dict(message='the matrix subclass is not',
323
+ category=PendingDeprecationWarning),
324
+ 'stats.rst':
325
+ dict(message='The maximum number of subdivisions',
326
+ category=integrate.IntegrationWarning),
327
+ }
328
+ known_warnings.update(dct)
329
+
330
+ # these legitimately emit warnings in examples
331
+ legit = set('scipy.signal.normalize')
332
+
333
+ # Now, the meat of the matter: filter warnings,
334
+ # also control the random seed for each doctest.
335
+
336
+ # XXX: this matches the refguide-check behavior, but is a tad strange:
337
+ # makes sure that the seed the old-fashioned np.random* methods is
338
+ # *NOT* reproducible but the new-style `default_rng()` *IS* repoducible.
339
+ # Should these two be either both repro or both not repro?
340
+
341
+ from scipy._lib._util import _fixed_default_rng
342
+ import numpy as np
343
+ with _fixed_default_rng():
344
+ np.random.seed(None)
345
+ with warnings.catch_warnings():
346
+ if test and test.name in known_warnings:
347
+ warnings.filterwarnings('ignore',
348
+ **known_warnings[test.name])
349
+ yield
350
+ elif test and test.name in legit:
351
+ yield
352
+ else:
353
+ warnings.simplefilter('error', Warning)
354
+ yield
355
+
356
+
357
+ dt_config.user_context_mgr = warnings_errors_and_rng
358
+ dt_config.skiplist = set([
359
+ 'scipy.linalg.LinAlgError', # comes from numpy
360
+ 'scipy.fftpack.fftshift', # fftpack stuff is also from numpy
361
+ 'scipy.fftpack.ifftshift',
362
+ 'scipy.fftpack.fftfreq',
363
+ 'scipy.special.sinc', # sinc is from numpy
364
+ 'scipy.optimize.show_options', # does not have much to doctest
365
+ 'scipy.signal.normalize', # manipulates warnings (XXX temp skip)
366
+ 'scipy.sparse.linalg.norm', # XXX temp skip
367
+ ])
368
+
369
+ # these are affected by NumPy 2.0 scalar repr: rely on string comparison
370
+ if np.__version__ < "2":
371
+ dt_config.skiplist.update(set([
372
+ 'scipy.io.hb_read',
373
+ 'scipy.io.hb_write',
374
+ 'scipy.sparse.csgraph.connected_components',
375
+ 'scipy.sparse.csgraph.depth_first_order',
376
+ 'scipy.sparse.csgraph.shortest_path',
377
+ 'scipy.sparse.csgraph.floyd_warshall',
378
+ 'scipy.sparse.csgraph.dijkstra',
379
+ 'scipy.sparse.csgraph.bellman_ford',
380
+ 'scipy.sparse.csgraph.johnson',
381
+ 'scipy.sparse.csgraph.yen',
382
+ 'scipy.sparse.csgraph.breadth_first_order',
383
+ 'scipy.sparse.csgraph.reverse_cuthill_mckee',
384
+ 'scipy.sparse.csgraph.structural_rank',
385
+ 'scipy.sparse.csgraph.construct_dist_matrix',
386
+ 'scipy.sparse.csgraph.reconstruct_path',
387
+ 'scipy.ndimage.value_indices',
388
+ 'scipy.stats.mstats.describe',
389
+ ]))
390
+
391
+ # help pytest collection a bit: these names are either private
392
+ # (distributions), or just do not need doctesting.
393
+ dt_config.pytest_extra_ignore = [
394
+ "scipy.stats.distributions",
395
+ "scipy.optimize.cython_optimize",
396
+ "scipy.test",
397
+ "scipy.show_config",
398
+ ]
399
+
400
+ dt_config.pytest_extra_xfail = {
401
+ # name: reason
402
+ "io.rst": "",
403
+ "ND_regular_grid.rst": "ReST parser limitation",
404
+ "extrapolation_examples.rst": "ReST parser limitation",
405
+ "sampling_pinv.rst": "__cinit__ unexpected argument",
406
+ "sampling_srou.rst": "nan in scalar_power",
407
+ "probability_distributions.rst": "integration warning",
408
+ }
409
+
410
+ # tutorials
411
+ dt_config.pseudocode = set(['integrate.nquad(func,'])
412
+ dt_config.local_resources = {'io.rst': ["octave_a.mat"]}
413
+ ############################################################################
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/linalg.pxd ADDED
@@ -0,0 +1 @@
 
 
1
+ from scipy.linalg cimport cython_blas, cython_lapack
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize.pxd ADDED
@@ -0,0 +1 @@
 
 
1
+ from .optimize cimport cython_optimize
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/README ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ From the website for the L-BFGS-B code (from at
2
+ http://www.ece.northwestern.edu/~nocedal/lbfgsb.html):
3
+
4
+ """
5
+ L-BFGS-B is a limited-memory quasi-Newton code for bound-constrained
6
+ optimization, i.e. for problems where the only constraints are of the
7
+ form l<= x <= u.
8
+ """
9
+
10
+ This is a Python wrapper (using F2PY) written by David M. Cooke
11
+ <[email protected]> and released as version 0.9 on April 9, 2004.
12
+ The wrapper was slightly modified by Joonas Paalasmaa for the 3.0 version
13
+ in March 2012.
14
+
15
+ License of L-BFGS-B (Fortran code)
16
+ ==================================
17
+
18
+ The version included here (in lbfgsb.f) is 3.0 (released April 25, 2011). It was
19
+ written by Ciyou Zhu, Richard Byrd, and Jorge Nocedal <[email protected]>. It
20
+ carries the following condition for use:
21
+
22
+ """
23
+ This software is freely available, but we expect that all publications
24
+ describing work using this software, or all commercial products using it,
25
+ quote at least one of the references given below. This software is released
26
+ under the BSD License.
27
+
28
+ References
29
+ * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
30
+ Constrained Optimization, (1995), SIAM Journal on Scientific and
31
+ Statistical Computing, 16, 5, pp. 1190-1208.
32
+ * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
33
+ FORTRAN routines for large scale bound constrained optimization (1997),
34
+ ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
35
+ * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
36
+ FORTRAN routines for large scale bound constrained optimization (2011),
37
+ ACM Transactions on Mathematical Software, 38, 1.
38
+ """
39
+
40
+ The Python wrapper
41
+ ==================
42
+
43
+ This code uses F2PY (http://cens.ioc.ee/projects/f2py2e/) to generate
44
+ the wrapper around the Fortran code.
45
+
46
+ The Python code and wrapper are copyrighted 2004 by David M. Cooke
47
48
+
49
+ Example usage
50
+ =============
51
+
52
+ An example of the usage is given at the bottom of the lbfgsb.py file.
53
+ Run it with 'python lbfgsb.py'.
54
+
55
+ License for the Python wrapper
56
+ ==============================
57
+
58
+ Copyright (c) 2004 David M. Cooke <[email protected]>
59
+
60
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
61
+ this software and associated documentation files (the "Software"), to deal in
62
+ the Software without restriction, including without limitation the rights to
63
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
64
+ of the Software, and to permit persons to whom the Software is furnished to do
65
+ so, subject to the following conditions:
66
+
67
+ The above copyright notice and this permission notice shall be included in all
68
+ copies or substantial portions of the Software.
69
+
70
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
71
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
72
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
73
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
74
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
75
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
76
+ SOFTWARE.
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__init__.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ =====================================================
3
+ Optimization and root finding (:mod:`scipy.optimize`)
4
+ =====================================================
5
+
6
+ .. currentmodule:: scipy.optimize
7
+
8
+ .. toctree::
9
+ :hidden:
10
+
11
+ optimize.cython_optimize
12
+
13
+ SciPy ``optimize`` provides functions for minimizing (or maximizing)
14
+ objective functions, possibly subject to constraints. It includes
15
+ solvers for nonlinear problems (with support for both local and global
16
+ optimization algorithms), linear programming, constrained
17
+ and nonlinear least-squares, root finding, and curve fitting.
18
+
19
+ Common functions and objects, shared across different solvers, are:
20
+
21
+ .. autosummary::
22
+ :toctree: generated/
23
+
24
+ show_options - Show specific options optimization solvers.
25
+ OptimizeResult - The optimization result returned by some optimizers.
26
+ OptimizeWarning - The optimization encountered problems.
27
+
28
+
29
+ Optimization
30
+ ============
31
+
32
+ Scalar functions optimization
33
+ -----------------------------
34
+
35
+ .. autosummary::
36
+ :toctree: generated/
37
+
38
+ minimize_scalar - Interface for minimizers of univariate functions
39
+
40
+ The `minimize_scalar` function supports the following methods:
41
+
42
+ .. toctree::
43
+
44
+ optimize.minimize_scalar-brent
45
+ optimize.minimize_scalar-bounded
46
+ optimize.minimize_scalar-golden
47
+
48
+ Local (multivariate) optimization
49
+ ---------------------------------
50
+
51
+ .. autosummary::
52
+ :toctree: generated/
53
+
54
+ minimize - Interface for minimizers of multivariate functions.
55
+
56
+ The `minimize` function supports the following methods:
57
+
58
+ .. toctree::
59
+
60
+ optimize.minimize-neldermead
61
+ optimize.minimize-powell
62
+ optimize.minimize-cg
63
+ optimize.minimize-bfgs
64
+ optimize.minimize-newtoncg
65
+ optimize.minimize-lbfgsb
66
+ optimize.minimize-tnc
67
+ optimize.minimize-cobyla
68
+ optimize.minimize-cobyqa
69
+ optimize.minimize-slsqp
70
+ optimize.minimize-trustconstr
71
+ optimize.minimize-dogleg
72
+ optimize.minimize-trustncg
73
+ optimize.minimize-trustkrylov
74
+ optimize.minimize-trustexact
75
+
76
+ Constraints are passed to `minimize` function as a single object or
77
+ as a list of objects from the following classes:
78
+
79
+ .. autosummary::
80
+ :toctree: generated/
81
+
82
+ NonlinearConstraint - Class defining general nonlinear constraints.
83
+ LinearConstraint - Class defining general linear constraints.
84
+
85
+ Simple bound constraints are handled separately and there is a special class
86
+ for them:
87
+
88
+ .. autosummary::
89
+ :toctree: generated/
90
+
91
+ Bounds - Bound constraints.
92
+
93
+ Quasi-Newton strategies implementing `HessianUpdateStrategy`
94
+ interface can be used to approximate the Hessian in `minimize`
95
+ function (available only for the 'trust-constr' method). Available
96
+ quasi-Newton methods implementing this interface are:
97
+
98
+ .. autosummary::
99
+ :toctree: generated/
100
+
101
+ BFGS - Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
102
+ SR1 - Symmetric-rank-1 Hessian update strategy.
103
+
104
+ .. _global_optimization:
105
+
106
+ Global optimization
107
+ -------------------
108
+
109
+ .. autosummary::
110
+ :toctree: generated/
111
+
112
+ basinhopping - Basinhopping stochastic optimizer.
113
+ brute - Brute force searching optimizer.
114
+ differential_evolution - Stochastic optimizer using differential evolution.
115
+
116
+ shgo - Simplicial homology global optimizer.
117
+ dual_annealing - Dual annealing stochastic optimizer.
118
+ direct - DIRECT (Dividing Rectangles) optimizer.
119
+
120
+ Least-squares and curve fitting
121
+ ===============================
122
+
123
+ Nonlinear least-squares
124
+ -----------------------
125
+
126
+ .. autosummary::
127
+ :toctree: generated/
128
+
129
+ least_squares - Solve a nonlinear least-squares problem with bounds on the variables.
130
+
131
+ Linear least-squares
132
+ --------------------
133
+
134
+ .. autosummary::
135
+ :toctree: generated/
136
+
137
+ nnls - Linear least-squares problem with non-negativity constraint.
138
+ lsq_linear - Linear least-squares problem with bound constraints.
139
+ isotonic_regression - Least squares problem of isotonic regression via PAVA.
140
+
141
+ Curve fitting
142
+ -------------
143
+
144
+ .. autosummary::
145
+ :toctree: generated/
146
+
147
+ curve_fit -- Fit curve to a set of points.
148
+
149
+ Root finding
150
+ ============
151
+
152
+ Scalar functions
153
+ ----------------
154
+ .. autosummary::
155
+ :toctree: generated/
156
+
157
+ root_scalar - Unified interface for nonlinear solvers of scalar functions.
158
+ brentq - quadratic interpolation Brent method.
159
+ brenth - Brent method, modified by Harris with hyperbolic extrapolation.
160
+ ridder - Ridder's method.
161
+ bisect - Bisection method.
162
+ newton - Newton's method (also Secant and Halley's methods).
163
+ toms748 - Alefeld, Potra & Shi Algorithm 748.
164
+ RootResults - The root finding result returned by some root finders.
165
+
166
+ The `root_scalar` function supports the following methods:
167
+
168
+ .. toctree::
169
+
170
+ optimize.root_scalar-brentq
171
+ optimize.root_scalar-brenth
172
+ optimize.root_scalar-bisect
173
+ optimize.root_scalar-ridder
174
+ optimize.root_scalar-newton
175
+ optimize.root_scalar-toms748
176
+ optimize.root_scalar-secant
177
+ optimize.root_scalar-halley
178
+
179
+
180
+
181
+ The table below lists situations and appropriate methods, along with
182
+ *asymptotic* convergence rates per iteration (and per function evaluation)
183
+ for successful convergence to a simple root(*).
184
+ Bisection is the slowest of them all, adding one bit of accuracy for each
185
+ function evaluation, but is guaranteed to converge.
186
+ The other bracketing methods all (eventually) increase the number of accurate
187
+ bits by about 50% for every function evaluation.
188
+ The derivative-based methods, all built on `newton`, can converge quite quickly
189
+ if the initial value is close to the root. They can also be applied to
190
+ functions defined on (a subset of) the complex plane.
191
+
192
+ +-------------+----------+----------+-----------+-------------+-------------+----------------+
193
+ | Domain of f | Bracket? | Derivatives? | Solvers | Convergence |
194
+ + + +----------+-----------+ +-------------+----------------+
195
+ | | | `fprime` | `fprime2` | | Guaranteed? | Rate(s)(*) |
196
+ +=============+==========+==========+===========+=============+=============+================+
197
+ | `R` | Yes | N/A | N/A | - bisection | - Yes | - 1 "Linear" |
198
+ | | | | | - brentq | - Yes | - >=1, <= 1.62 |
199
+ | | | | | - brenth | - Yes | - >=1, <= 1.62 |
200
+ | | | | | - ridder | - Yes | - 2.0 (1.41) |
201
+ | | | | | - toms748 | - Yes | - 2.7 (1.65) |
202
+ +-------------+----------+----------+-----------+-------------+-------------+----------------+
203
+ | `R` or `C` | No | No | No | secant | No | 1.62 (1.62) |
204
+ +-------------+----------+----------+-----------+-------------+-------------+----------------+
205
+ | `R` or `C` | No | Yes | No | newton | No | 2.00 (1.41) |
206
+ +-------------+----------+----------+-----------+-------------+-------------+----------------+
207
+ | `R` or `C` | No | Yes | Yes | halley | No | 3.00 (1.44) |
208
+ +-------------+----------+----------+-----------+-------------+-------------+----------------+
209
+
210
+ .. seealso::
211
+
212
+ `scipy.optimize.cython_optimize` -- Typed Cython versions of root finding functions
213
+
214
+ Fixed point finding:
215
+
216
+ .. autosummary::
217
+ :toctree: generated/
218
+
219
+ fixed_point - Single-variable fixed-point solver.
220
+
221
+ Multidimensional
222
+ ----------------
223
+
224
+ .. autosummary::
225
+ :toctree: generated/
226
+
227
+ root - Unified interface for nonlinear solvers of multivariate functions.
228
+
229
+ The `root` function supports the following methods:
230
+
231
+ .. toctree::
232
+
233
+ optimize.root-hybr
234
+ optimize.root-lm
235
+ optimize.root-broyden1
236
+ optimize.root-broyden2
237
+ optimize.root-anderson
238
+ optimize.root-linearmixing
239
+ optimize.root-diagbroyden
240
+ optimize.root-excitingmixing
241
+ optimize.root-krylov
242
+ optimize.root-dfsane
243
+
244
+ Linear programming / MILP
245
+ =========================
246
+
247
+ .. autosummary::
248
+ :toctree: generated/
249
+
250
+ milp -- Mixed integer linear programming.
251
+ linprog -- Unified interface for minimizers of linear programming problems.
252
+
253
+ The `linprog` function supports the following methods:
254
+
255
+ .. toctree::
256
+
257
+ optimize.linprog-simplex
258
+ optimize.linprog-interior-point
259
+ optimize.linprog-revised_simplex
260
+ optimize.linprog-highs-ipm
261
+ optimize.linprog-highs-ds
262
+ optimize.linprog-highs
263
+
264
+ The simplex, interior-point, and revised simplex methods support callback
265
+ functions, such as:
266
+
267
+ .. autosummary::
268
+ :toctree: generated/
269
+
270
+ linprog_verbose_callback -- Sample callback function for linprog (simplex).
271
+
272
+ Assignment problems
273
+ ===================
274
+
275
+ .. autosummary::
276
+ :toctree: generated/
277
+
278
+ linear_sum_assignment -- Solves the linear-sum assignment problem.
279
+ quadratic_assignment -- Solves the quadratic assignment problem.
280
+
281
+ The `quadratic_assignment` function supports the following methods:
282
+
283
+ .. toctree::
284
+
285
+ optimize.qap-faq
286
+ optimize.qap-2opt
287
+
288
+ Utilities
289
+ =========
290
+
291
+ Finite-difference approximation
292
+ -------------------------------
293
+
294
+ .. autosummary::
295
+ :toctree: generated/
296
+
297
+ approx_fprime - Approximate the gradient of a scalar function.
298
+ check_grad - Check the supplied derivative using finite differences.
299
+
300
+
301
+ Line search
302
+ -----------
303
+
304
+ .. autosummary::
305
+ :toctree: generated/
306
+
307
+ bracket - Bracket a minimum, given two starting points.
308
+ line_search - Return a step that satisfies the strong Wolfe conditions.
309
+
310
+ Hessian approximation
311
+ ---------------------
312
+
313
+ .. autosummary::
314
+ :toctree: generated/
315
+
316
+ LbfgsInvHessProduct - Linear operator for L-BFGS approximate inverse Hessian.
317
+ HessianUpdateStrategy - Interface for implementing Hessian update strategies
318
+
319
+ Benchmark problems
320
+ ------------------
321
+
322
+ .. autosummary::
323
+ :toctree: generated/
324
+
325
+ rosen - The Rosenbrock function.
326
+ rosen_der - The derivative of the Rosenbrock function.
327
+ rosen_hess - The Hessian matrix of the Rosenbrock function.
328
+ rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
329
+
330
+ Legacy functions
331
+ ================
332
+
333
+ The functions below are not recommended for use in new scripts;
334
+ all of these methods are accessible via a newer, more consistent
335
+ interfaces, provided by the interfaces above.
336
+
337
+ Optimization
338
+ ------------
339
+
340
+ General-purpose multivariate methods:
341
+
342
+ .. autosummary::
343
+ :toctree: generated/
344
+
345
+ fmin - Nelder-Mead Simplex algorithm.
346
+ fmin_powell - Powell's (modified) conjugate direction method.
347
+ fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm.
348
+ fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno).
349
+ fmin_ncg - Line-search Newton Conjugate Gradient.
350
+
351
+ Constrained multivariate methods:
352
+
353
+ .. autosummary::
354
+ :toctree: generated/
355
+
356
+ fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer.
357
+ fmin_tnc - Truncated Newton code.
358
+ fmin_cobyla - Constrained optimization by linear approximation.
359
+ fmin_slsqp - Minimization using sequential least-squares programming.
360
+
361
+ Univariate (scalar) minimization methods:
362
+
363
+ .. autosummary::
364
+ :toctree: generated/
365
+
366
+ fminbound - Bounded minimization of a scalar function.
367
+ brent - 1-D function minimization using Brent method.
368
+ golden - 1-D function minimization using Golden Section method.
369
+
370
+ Least-squares
371
+ -------------
372
+
373
+ .. autosummary::
374
+ :toctree: generated/
375
+
376
+ leastsq - Minimize the sum of squares of M equations in N unknowns.
377
+
378
+ Root finding
379
+ ------------
380
+
381
+ General nonlinear solvers:
382
+
383
+ .. autosummary::
384
+ :toctree: generated/
385
+
386
+ fsolve - Non-linear multivariable equation solver.
387
+ broyden1 - Broyden's first method.
388
+ broyden2 - Broyden's second method.
389
+ NoConvergence - Exception raised when nonlinear solver does not converge.
390
+
391
+ Large-scale nonlinear solvers:
392
+
393
+ .. autosummary::
394
+ :toctree: generated/
395
+
396
+ newton_krylov
397
+ anderson
398
+
399
+ BroydenFirst
400
+ InverseJacobian
401
+ KrylovJacobian
402
+
403
+ Simple iteration solvers:
404
+
405
+ .. autosummary::
406
+ :toctree: generated/
407
+
408
+ excitingmixing
409
+ linearmixing
410
+ diagbroyden
411
+
412
+ """ # noqa: E501
413
+
414
+ from ._optimize import *
415
+ from ._minimize import *
416
+ from ._root import *
417
+ from ._root_scalar import *
418
+ from ._minpack_py import *
419
+ from ._zeros_py import *
420
+ from ._lbfgsb_py import fmin_l_bfgs_b, LbfgsInvHessProduct
421
+ from ._tnc import fmin_tnc
422
+ from ._cobyla_py import fmin_cobyla
423
+ from ._nonlin import *
424
+ from ._slsqp_py import fmin_slsqp
425
+ from ._nnls import nnls
426
+ from ._basinhopping import basinhopping
427
+ from ._linprog import linprog, linprog_verbose_callback
428
+ from ._lsap import linear_sum_assignment
429
+ from ._differentialevolution import differential_evolution
430
+ from ._lsq import least_squares, lsq_linear
431
+ from ._isotonic import isotonic_regression
432
+ from ._constraints import (NonlinearConstraint,
433
+ LinearConstraint,
434
+ Bounds)
435
+ from ._hessian_update_strategy import HessianUpdateStrategy, BFGS, SR1
436
+ from ._shgo import shgo
437
+ from ._dual_annealing import dual_annealing
438
+ from ._qap import quadratic_assignment
439
+ from ._direct_py import direct
440
+ from ._milp import milp
441
+
442
+ # Deprecated namespaces, to be removed in v2.0.0
443
+ from . import (
444
+ cobyla, lbfgsb, linesearch, minpack, minpack2, moduleTNC, nonlin, optimize,
445
+ slsqp, tnc, zeros
446
+ )
447
+
448
+ __all__ = [s for s in dir() if not s.startswith('_')]
449
+
450
+ from scipy._lib._testutils import PytestTester
451
+ test = PytestTester(__name__)
452
+ del PytestTester
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_basinhopping.py ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ basinhopping: The basinhopping global optimization algorithm
3
+ """
4
+ import numpy as np
5
+ import math
6
+ import inspect
7
+ import scipy.optimize
8
+ from scipy._lib._util import check_random_state
9
+
10
+ __all__ = ['basinhopping']
11
+
12
+
13
+ _params = (inspect.Parameter('res_new', kind=inspect.Parameter.KEYWORD_ONLY),
14
+ inspect.Parameter('res_old', kind=inspect.Parameter.KEYWORD_ONLY))
15
+ _new_accept_test_signature = inspect.Signature(parameters=_params)
16
+
17
+
18
+ class Storage:
19
+ """
20
+ Class used to store the lowest energy structure
21
+ """
22
+ def __init__(self, minres):
23
+ self._add(minres)
24
+
25
+ def _add(self, minres):
26
+ self.minres = minres
27
+ self.minres.x = np.copy(minres.x)
28
+
29
+ def update(self, minres):
30
+ if minres.success and (minres.fun < self.minres.fun
31
+ or not self.minres.success):
32
+ self._add(minres)
33
+ return True
34
+ else:
35
+ return False
36
+
37
+ def get_lowest(self):
38
+ return self.minres
39
+
40
+
41
+ class BasinHoppingRunner:
42
+ """This class implements the core of the basinhopping algorithm.
43
+
44
+ x0 : ndarray
45
+ The starting coordinates.
46
+ minimizer : callable
47
+ The local minimizer, with signature ``result = minimizer(x)``.
48
+ The return value is an `optimize.OptimizeResult` object.
49
+ step_taking : callable
50
+ This function displaces the coordinates randomly. Signature should
51
+ be ``x_new = step_taking(x)``. Note that `x` may be modified in-place.
52
+ accept_tests : list of callables
53
+ Each test is passed the kwargs `f_new`, `x_new`, `f_old` and
54
+ `x_old`. These tests will be used to judge whether or not to accept
55
+ the step. The acceptable return values are True, False, or ``"force
56
+ accept"``. If any of the tests return False then the step is rejected.
57
+ If ``"force accept"``, then this will override any other tests in
58
+ order to accept the step. This can be used, for example, to forcefully
59
+ escape from a local minimum that ``basinhopping`` is trapped in.
60
+ disp : bool, optional
61
+ Display status messages.
62
+
63
+ """
64
+ def __init__(self, x0, minimizer, step_taking, accept_tests, disp=False):
65
+ self.x = np.copy(x0)
66
+ self.minimizer = minimizer
67
+ self.step_taking = step_taking
68
+ self.accept_tests = accept_tests
69
+ self.disp = disp
70
+
71
+ self.nstep = 0
72
+
73
+ # initialize return object
74
+ self.res = scipy.optimize.OptimizeResult()
75
+ self.res.minimization_failures = 0
76
+
77
+ # do initial minimization
78
+ minres = minimizer(self.x)
79
+ if not minres.success:
80
+ self.res.minimization_failures += 1
81
+ if self.disp:
82
+ print("warning: basinhopping: local minimization failure")
83
+ self.x = np.copy(minres.x)
84
+ self.energy = minres.fun
85
+ self.incumbent_minres = minres # best minimize result found so far
86
+ if self.disp:
87
+ print("basinhopping step %d: f %g" % (self.nstep, self.energy))
88
+
89
+ # initialize storage class
90
+ self.storage = Storage(minres)
91
+
92
+ if hasattr(minres, "nfev"):
93
+ self.res.nfev = minres.nfev
94
+ if hasattr(minres, "njev"):
95
+ self.res.njev = minres.njev
96
+ if hasattr(minres, "nhev"):
97
+ self.res.nhev = minres.nhev
98
+
99
+ def _monte_carlo_step(self):
100
+ """Do one Monte Carlo iteration
101
+
102
+ Randomly displace the coordinates, minimize, and decide whether
103
+ or not to accept the new coordinates.
104
+ """
105
+ # Take a random step. Make a copy of x because the step_taking
106
+ # algorithm might change x in place
107
+ x_after_step = np.copy(self.x)
108
+ x_after_step = self.step_taking(x_after_step)
109
+
110
+ # do a local minimization
111
+ minres = self.minimizer(x_after_step)
112
+ x_after_quench = minres.x
113
+ energy_after_quench = minres.fun
114
+ if not minres.success:
115
+ self.res.minimization_failures += 1
116
+ if self.disp:
117
+ print("warning: basinhopping: local minimization failure")
118
+ if hasattr(minres, "nfev"):
119
+ self.res.nfev += minres.nfev
120
+ if hasattr(minres, "njev"):
121
+ self.res.njev += minres.njev
122
+ if hasattr(minres, "nhev"):
123
+ self.res.nhev += minres.nhev
124
+
125
+ # accept the move based on self.accept_tests. If any test is False,
126
+ # then reject the step. If any test returns the special string
127
+ # 'force accept', then accept the step regardless. This can be used
128
+ # to forcefully escape from a local minimum if normal basin hopping
129
+ # steps are not sufficient.
130
+ accept = True
131
+ for test in self.accept_tests:
132
+ if inspect.signature(test) == _new_accept_test_signature:
133
+ testres = test(res_new=minres, res_old=self.incumbent_minres)
134
+ else:
135
+ testres = test(f_new=energy_after_quench, x_new=x_after_quench,
136
+ f_old=self.energy, x_old=self.x)
137
+
138
+ if testres == 'force accept':
139
+ accept = True
140
+ break
141
+ elif testres is None:
142
+ raise ValueError("accept_tests must return True, False, or "
143
+ "'force accept'")
144
+ elif not testres:
145
+ accept = False
146
+
147
+ # Report the result of the acceptance test to the take step class.
148
+ # This is for adaptive step taking
149
+ if hasattr(self.step_taking, "report"):
150
+ self.step_taking.report(accept, f_new=energy_after_quench,
151
+ x_new=x_after_quench, f_old=self.energy,
152
+ x_old=self.x)
153
+
154
+ return accept, minres
155
+
156
+ def one_cycle(self):
157
+ """Do one cycle of the basinhopping algorithm
158
+ """
159
+ self.nstep += 1
160
+ new_global_min = False
161
+
162
+ accept, minres = self._monte_carlo_step()
163
+
164
+ if accept:
165
+ self.energy = minres.fun
166
+ self.x = np.copy(minres.x)
167
+ self.incumbent_minres = minres # best minimize result found so far
168
+ new_global_min = self.storage.update(minres)
169
+
170
+ # print some information
171
+ if self.disp:
172
+ self.print_report(minres.fun, accept)
173
+ if new_global_min:
174
+ print("found new global minimum on step %d with function"
175
+ " value %g" % (self.nstep, self.energy))
176
+
177
+ # save some variables as BasinHoppingRunner attributes
178
+ self.xtrial = minres.x
179
+ self.energy_trial = minres.fun
180
+ self.accept = accept
181
+
182
+ return new_global_min
183
+
184
+ def print_report(self, energy_trial, accept):
185
+ """print a status update"""
186
+ minres = self.storage.get_lowest()
187
+ print("basinhopping step %d: f %g trial_f %g accepted %d "
188
+ " lowest_f %g" % (self.nstep, self.energy, energy_trial,
189
+ accept, minres.fun))
190
+
191
+
192
+ class AdaptiveStepsize:
193
+ """
194
+ Class to implement adaptive stepsize.
195
+
196
+ This class wraps the step taking class and modifies the stepsize to
197
+ ensure the true acceptance rate is as close as possible to the target.
198
+
199
+ Parameters
200
+ ----------
201
+ takestep : callable
202
+ The step taking routine. Must contain modifiable attribute
203
+ takestep.stepsize
204
+ accept_rate : float, optional
205
+ The target step acceptance rate
206
+ interval : int, optional
207
+ Interval for how often to update the stepsize
208
+ factor : float, optional
209
+ The step size is multiplied or divided by this factor upon each
210
+ update.
211
+ verbose : bool, optional
212
+ Print information about each update
213
+
214
+ """
215
+ def __init__(self, takestep, accept_rate=0.5, interval=50, factor=0.9,
216
+ verbose=True):
217
+ self.takestep = takestep
218
+ self.target_accept_rate = accept_rate
219
+ self.interval = interval
220
+ self.factor = factor
221
+ self.verbose = verbose
222
+
223
+ self.nstep = 0
224
+ self.nstep_tot = 0
225
+ self.naccept = 0
226
+
227
+ def __call__(self, x):
228
+ return self.take_step(x)
229
+
230
+ def _adjust_step_size(self):
231
+ old_stepsize = self.takestep.stepsize
232
+ accept_rate = float(self.naccept) / self.nstep
233
+ if accept_rate > self.target_accept_rate:
234
+ # We're accepting too many steps. This generally means we're
235
+ # trapped in a basin. Take bigger steps.
236
+ self.takestep.stepsize /= self.factor
237
+ else:
238
+ # We're not accepting enough steps. Take smaller steps.
239
+ self.takestep.stepsize *= self.factor
240
+ if self.verbose:
241
+ print(f"adaptive stepsize: acceptance rate {accept_rate:f} target "
242
+ f"{self.target_accept_rate:f} new stepsize "
243
+ f"{self.takestep.stepsize:g} old stepsize {old_stepsize:g}")
244
+
245
+ def take_step(self, x):
246
+ self.nstep += 1
247
+ self.nstep_tot += 1
248
+ if self.nstep % self.interval == 0:
249
+ self._adjust_step_size()
250
+ return self.takestep(x)
251
+
252
+ def report(self, accept, **kwargs):
253
+ "called by basinhopping to report the result of the step"
254
+ if accept:
255
+ self.naccept += 1
256
+
257
+
258
+ class RandomDisplacement:
259
+ """Add a random displacement of maximum size `stepsize` to each coordinate.
260
+
261
+ Calling this updates `x` in-place.
262
+
263
+ Parameters
264
+ ----------
265
+ stepsize : float, optional
266
+ Maximum stepsize in any dimension
267
+ random_gen : {None, int, `numpy.random.Generator`,
268
+ `numpy.random.RandomState`}, optional
269
+
270
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
271
+ singleton is used.
272
+ If `seed` is an int, a new ``RandomState`` instance is used,
273
+ seeded with `seed`.
274
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
275
+ that instance is used.
276
+
277
+ """
278
+
279
+ def __init__(self, stepsize=0.5, random_gen=None):
280
+ self.stepsize = stepsize
281
+ self.random_gen = check_random_state(random_gen)
282
+
283
+ def __call__(self, x):
284
+ x += self.random_gen.uniform(-self.stepsize, self.stepsize,
285
+ np.shape(x))
286
+ return x
287
+
288
+
289
+ class MinimizerWrapper:
290
+ """
291
+ wrap a minimizer function as a minimizer class
292
+ """
293
+ def __init__(self, minimizer, func=None, **kwargs):
294
+ self.minimizer = minimizer
295
+ self.func = func
296
+ self.kwargs = kwargs
297
+
298
+ def __call__(self, x0):
299
+ if self.func is None:
300
+ return self.minimizer(x0, **self.kwargs)
301
+ else:
302
+ return self.minimizer(self.func, x0, **self.kwargs)
303
+
304
+
305
+ class Metropolis:
306
+ """Metropolis acceptance criterion.
307
+
308
+ Parameters
309
+ ----------
310
+ T : float
311
+ The "temperature" parameter for the accept or reject criterion.
312
+ random_gen : {None, int, `numpy.random.Generator`,
313
+ `numpy.random.RandomState`}, optional
314
+
315
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
316
+ singleton is used.
317
+ If `seed` is an int, a new ``RandomState`` instance is used,
318
+ seeded with `seed`.
319
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
320
+ that instance is used.
321
+ Random number generator used for acceptance test.
322
+
323
+ """
324
+
325
+ def __init__(self, T, random_gen=None):
326
+ # Avoid ZeroDivisionError since "MBH can be regarded as a special case
327
+ # of the BH framework with the Metropolis criterion, where temperature
328
+ # T = 0." (Reject all steps that increase energy.)
329
+ self.beta = 1.0 / T if T != 0 else float('inf')
330
+ self.random_gen = check_random_state(random_gen)
331
+
332
+ def accept_reject(self, res_new, res_old):
333
+ """
334
+ Assuming the local search underlying res_new was successful:
335
+ If new energy is lower than old, it will always be accepted.
336
+ If new is higher than old, there is a chance it will be accepted,
337
+ less likely for larger differences.
338
+ """
339
+ with np.errstate(invalid='ignore'):
340
+ # The energy values being fed to Metropolis are 1-length arrays, and if
341
+ # they are equal, their difference is 0, which gets multiplied by beta,
342
+ # which is inf, and array([0]) * float('inf') causes
343
+ #
344
+ # RuntimeWarning: invalid value encountered in multiply
345
+ #
346
+ # Ignore this warning so when the algorithm is on a flat plane, it always
347
+ # accepts the step, to try to move off the plane.
348
+ prod = -(res_new.fun - res_old.fun) * self.beta
349
+ w = math.exp(min(0, prod))
350
+
351
+ rand = self.random_gen.uniform()
352
+ return w >= rand and (res_new.success or not res_old.success)
353
+
354
+ def __call__(self, *, res_new, res_old):
355
+ """
356
+ f_new and f_old are mandatory in kwargs
357
+ """
358
+ return bool(self.accept_reject(res_new, res_old))
359
+
360
+
361
+ def basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5,
362
+ minimizer_kwargs=None, take_step=None, accept_test=None,
363
+ callback=None, interval=50, disp=False, niter_success=None,
364
+ seed=None, *, target_accept_rate=0.5, stepwise_factor=0.9):
365
+ """Find the global minimum of a function using the basin-hopping algorithm.
366
+
367
+ Basin-hopping is a two-phase method that combines a global stepping
368
+ algorithm with local minimization at each step. Designed to mimic
369
+ the natural process of energy minimization of clusters of atoms, it works
370
+ well for similar problems with "funnel-like, but rugged" energy landscapes
371
+ [5]_.
372
+
373
+ As the step-taking, step acceptance, and minimization methods are all
374
+ customizable, this function can also be used to implement other two-phase
375
+ methods.
376
+
377
+ Parameters
378
+ ----------
379
+ func : callable ``f(x, *args)``
380
+ Function to be optimized. ``args`` can be passed as an optional item
381
+ in the dict `minimizer_kwargs`
382
+ x0 : array_like
383
+ Initial guess.
384
+ niter : integer, optional
385
+ The number of basin-hopping iterations. There will be a total of
386
+ ``niter + 1`` runs of the local minimizer.
387
+ T : float, optional
388
+ The "temperature" parameter for the acceptance or rejection criterion.
389
+ Higher "temperatures" mean that larger jumps in function value will be
390
+ accepted. For best results `T` should be comparable to the
391
+ separation (in function value) between local minima.
392
+ stepsize : float, optional
393
+ Maximum step size for use in the random displacement.
394
+ minimizer_kwargs : dict, optional
395
+ Extra keyword arguments to be passed to the local minimizer
396
+ `scipy.optimize.minimize` Some important options could be:
397
+
398
+ method : str
399
+ The minimization method (e.g. ``"L-BFGS-B"``)
400
+ args : tuple
401
+ Extra arguments passed to the objective function (`func`) and
402
+ its derivatives (Jacobian, Hessian).
403
+
404
+ take_step : callable ``take_step(x)``, optional
405
+ Replace the default step-taking routine with this routine. The default
406
+ step-taking routine is a random displacement of the coordinates, but
407
+ other step-taking algorithms may be better for some systems.
408
+ `take_step` can optionally have the attribute ``take_step.stepsize``.
409
+ If this attribute exists, then `basinhopping` will adjust
410
+ ``take_step.stepsize`` in order to try to optimize the global minimum
411
+ search.
412
+ accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
413
+ Define a test which will be used to judge whether to accept the
414
+ step. This will be used in addition to the Metropolis test based on
415
+ "temperature" `T`. The acceptable return values are True,
416
+ False, or ``"force accept"``. If any of the tests return False
417
+ then the step is rejected. If the latter, then this will override any
418
+ other tests in order to accept the step. This can be used, for example,
419
+ to forcefully escape from a local minimum that `basinhopping` is
420
+ trapped in.
421
+ callback : callable, ``callback(x, f, accept)``, optional
422
+ A callback function which will be called for all minima found. ``x``
423
+ and ``f`` are the coordinates and function value of the trial minimum,
424
+ and ``accept`` is whether that minimum was accepted. This can
425
+ be used, for example, to save the lowest N minima found. Also,
426
+ `callback` can be used to specify a user defined stop criterion by
427
+ optionally returning True to stop the `basinhopping` routine.
428
+ interval : integer, optional
429
+ interval for how often to update the `stepsize`
430
+ disp : bool, optional
431
+ Set to True to print status messages
432
+ niter_success : integer, optional
433
+ Stop the run if the global minimum candidate remains the same for this
434
+ number of iterations.
435
+ seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
436
+
437
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
438
+ singleton is used.
439
+ If `seed` is an int, a new ``RandomState`` instance is used,
440
+ seeded with `seed`.
441
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
442
+ that instance is used.
443
+ Specify `seed` for repeatable minimizations. The random numbers
444
+ generated with this seed only affect the default Metropolis
445
+ `accept_test` and the default `take_step`. If you supply your own
446
+ `take_step` and `accept_test`, and these functions use random
447
+ number generation, then those functions are responsible for the state
448
+ of their random number generator.
449
+ target_accept_rate : float, optional
450
+ The target acceptance rate that is used to adjust the `stepsize`.
451
+ If the current acceptance rate is greater than the target,
452
+ then the `stepsize` is increased. Otherwise, it is decreased.
453
+ Range is (0, 1). Default is 0.5.
454
+
455
+ .. versionadded:: 1.8.0
456
+
457
+ stepwise_factor : float, optional
458
+ The `stepsize` is multiplied or divided by this stepwise factor upon
459
+ each update. Range is (0, 1). Default is 0.9.
460
+
461
+ .. versionadded:: 1.8.0
462
+
463
+ Returns
464
+ -------
465
+ res : OptimizeResult
466
+ The optimization result represented as a `OptimizeResult` object.
467
+ Important attributes are: ``x`` the solution array, ``fun`` the value
468
+ of the function at the solution, and ``message`` which describes the
469
+ cause of the termination. The ``OptimizeResult`` object returned by the
470
+ selected minimizer at the lowest minimum is also contained within this
471
+ object and can be accessed through the ``lowest_optimization_result``
472
+ attribute. See `OptimizeResult` for a description of other attributes.
473
+
474
+ See Also
475
+ --------
476
+ minimize :
477
+ The local minimization function called once for each basinhopping step.
478
+ `minimizer_kwargs` is passed to this routine.
479
+
480
+ Notes
481
+ -----
482
+ Basin-hopping is a stochastic algorithm which attempts to find the global
483
+ minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
484
+ [4]_. The algorithm in its current form was described by David Wales and
485
+ Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
486
+
487
+ The algorithm is iterative with each cycle composed of the following
488
+ features
489
+
490
+ 1) random perturbation of the coordinates
491
+
492
+ 2) local minimization
493
+
494
+ 3) accept or reject the new coordinates based on the minimized function
495
+ value
496
+
497
+ The acceptance test used here is the Metropolis criterion of standard Monte
498
+ Carlo algorithms, although there are many other possibilities [3]_.
499
+
500
+ This global minimization method has been shown to be extremely efficient
501
+ for a wide variety of problems in physics and chemistry. It is
502
+ particularly useful when the function has many minima separated by large
503
+ barriers. See the `Cambridge Cluster Database
504
+ <https://www-wales.ch.cam.ac.uk/CCD.html>`_ for databases of molecular
505
+ systems that have been optimized primarily using basin-hopping. This
506
+ database includes minimization problems exceeding 300 degrees of freedom.
507
+
508
+ See the free software program `GMIN <https://www-wales.ch.cam.ac.uk/GMIN>`_
509
+ for a Fortran implementation of basin-hopping. This implementation has many
510
+ variations of the procedure described above, including more
511
+ advanced step taking algorithms and alternate acceptance criterion.
512
+
513
+ For stochastic global optimization there is no way to determine if the true
514
+ global minimum has actually been found. Instead, as a consistency check,
515
+ the algorithm can be run from a number of different random starting points
516
+ to ensure the lowest minimum found in each example has converged to the
517
+ global minimum. For this reason, `basinhopping` will by default simply
518
+ run for the number of iterations `niter` and return the lowest minimum
519
+ found. It is left to the user to ensure that this is in fact the global
520
+ minimum.
521
+
522
+ Choosing `stepsize`: This is a crucial parameter in `basinhopping` and
523
+ depends on the problem being solved. The step is chosen uniformly in the
524
+ region from x0-stepsize to x0+stepsize, in each dimension. Ideally, it
525
+ should be comparable to the typical separation (in argument values) between
526
+ local minima of the function being optimized. `basinhopping` will, by
527
+ default, adjust `stepsize` to find an optimal value, but this may take
528
+ many iterations. You will get quicker results if you set a sensible
529
+ initial value for ``stepsize``.
530
+
531
+ Choosing `T`: The parameter `T` is the "temperature" used in the
532
+ Metropolis criterion. Basinhopping steps are always accepted if
533
+ ``func(xnew) < func(xold)``. Otherwise, they are accepted with
534
+ probability::
535
+
536
+ exp( -(func(xnew) - func(xold)) / T )
537
+
538
+ So, for best results, `T` should to be comparable to the typical
539
+ difference (in function values) between local minima. (The height of
540
+ "walls" between local minima is irrelevant.)
541
+
542
+ If `T` is 0, the algorithm becomes Monotonic Basin-Hopping, in which all
543
+ steps that increase energy are rejected.
544
+
545
+ .. versionadded:: 0.12.0
546
+
547
+ References
548
+ ----------
549
+ .. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
550
+ Cambridge, UK.
551
+ .. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
552
+ the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
553
+ 110 Atoms. Journal of Physical Chemistry A, 1997, 101, 5111.
554
+ .. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
555
+ multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
556
+ 1987, 84, 6611.
557
+ .. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
558
+ crystals, and biomolecules, Science, 1999, 285, 1368.
559
+ .. [5] Olson, B., Hashmi, I., Molloy, K., and Shehu1, A., Basin Hopping as
560
+ a General and Versatile Optimization Framework for the Characterization
561
+ of Biological Macromolecules, Advances in Artificial Intelligence,
562
+ Volume 2012 (2012), Article ID 674832, :doi:`10.1155/2012/674832`
563
+
564
+ Examples
565
+ --------
566
+ The following example is a 1-D minimization problem, with many
567
+ local minima superimposed on a parabola.
568
+
569
+ >>> import numpy as np
570
+ >>> from scipy.optimize import basinhopping
571
+ >>> func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x
572
+ >>> x0 = [1.]
573
+
574
+ Basinhopping, internally, uses a local minimization algorithm. We will use
575
+ the parameter `minimizer_kwargs` to tell basinhopping which algorithm to
576
+ use and how to set up that minimizer. This parameter will be passed to
577
+ `scipy.optimize.minimize`.
578
+
579
+ >>> minimizer_kwargs = {"method": "BFGS"}
580
+ >>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
581
+ ... niter=200)
582
+ >>> # the global minimum is:
583
+ >>> ret.x, ret.fun
584
+ -0.1951, -1.0009
585
+
586
+ Next consider a 2-D minimization problem. Also, this time, we
587
+ will use gradient information to significantly speed up the search.
588
+
589
+ >>> def func2d(x):
590
+ ... f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
591
+ ... 0.2) * x[0]
592
+ ... df = np.zeros(2)
593
+ ... df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
594
+ ... df[1] = 2. * x[1] + 0.2
595
+ ... return f, df
596
+
597
+ We'll also use a different local minimization algorithm. Also, we must tell
598
+ the minimizer that our function returns both energy and gradient (Jacobian).
599
+
600
+ >>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
601
+ >>> x0 = [1.0, 1.0]
602
+ >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
603
+ ... niter=200)
604
+ >>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
605
+ ... ret.x[1],
606
+ ... ret.fun))
607
+ global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
608
+
609
+ Here is an example using a custom step-taking routine. Imagine you want
610
+ the first coordinate to take larger steps than the rest of the coordinates.
611
+ This can be implemented like so:
612
+
613
+ >>> class MyTakeStep:
614
+ ... def __init__(self, stepsize=0.5):
615
+ ... self.stepsize = stepsize
616
+ ... self.rng = np.random.default_rng()
617
+ ... def __call__(self, x):
618
+ ... s = self.stepsize
619
+ ... x[0] += self.rng.uniform(-2.*s, 2.*s)
620
+ ... x[1:] += self.rng.uniform(-s, s, x[1:].shape)
621
+ ... return x
622
+
623
+ Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
624
+ of `stepsize` to optimize the search. We'll use the same 2-D function as
625
+ before
626
+
627
+ >>> mytakestep = MyTakeStep()
628
+ >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
629
+ ... niter=200, take_step=mytakestep)
630
+ >>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
631
+ ... ret.x[1],
632
+ ... ret.fun))
633
+ global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
634
+
635
+ Now, let's do an example using a custom callback function which prints the
636
+ value of every minimum found
637
+
638
+ >>> def print_fun(x, f, accepted):
639
+ ... print("at minimum %.4f accepted %d" % (f, int(accepted)))
640
+
641
+ We'll run it for only 10 basinhopping steps this time.
642
+
643
+ >>> rng = np.random.default_rng()
644
+ >>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
645
+ ... niter=10, callback=print_fun, seed=rng)
646
+ at minimum 0.4159 accepted 1
647
+ at minimum -0.4317 accepted 1
648
+ at minimum -1.0109 accepted 1
649
+ at minimum -0.9073 accepted 1
650
+ at minimum -0.4317 accepted 0
651
+ at minimum -0.1021 accepted 1
652
+ at minimum -0.7425 accepted 1
653
+ at minimum -0.9073 accepted 1
654
+ at minimum -0.4317 accepted 0
655
+ at minimum -0.7425 accepted 1
656
+ at minimum -0.9073 accepted 1
657
+
658
+ The minimum at -1.0109 is actually the global minimum, found already on the
659
+ 8th iteration.
660
+
661
+ """ # numpy/numpydoc#87 # noqa: E501
662
+ if target_accept_rate <= 0. or target_accept_rate >= 1.:
663
+ raise ValueError('target_accept_rate has to be in range (0, 1)')
664
+ if stepwise_factor <= 0. or stepwise_factor >= 1.:
665
+ raise ValueError('stepwise_factor has to be in range (0, 1)')
666
+
667
+ x0 = np.array(x0)
668
+
669
+ # set up the np.random generator
670
+ rng = check_random_state(seed)
671
+
672
+ # set up minimizer
673
+ if minimizer_kwargs is None:
674
+ minimizer_kwargs = dict()
675
+ wrapped_minimizer = MinimizerWrapper(scipy.optimize.minimize, func,
676
+ **minimizer_kwargs)
677
+
678
+ # set up step-taking algorithm
679
+ if take_step is not None:
680
+ if not callable(take_step):
681
+ raise TypeError("take_step must be callable")
682
+ # if take_step.stepsize exists then use AdaptiveStepsize to control
683
+ # take_step.stepsize
684
+ if hasattr(take_step, "stepsize"):
685
+ take_step_wrapped = AdaptiveStepsize(
686
+ take_step, interval=interval,
687
+ accept_rate=target_accept_rate,
688
+ factor=stepwise_factor,
689
+ verbose=disp)
690
+ else:
691
+ take_step_wrapped = take_step
692
+ else:
693
+ # use default
694
+ displace = RandomDisplacement(stepsize=stepsize, random_gen=rng)
695
+ take_step_wrapped = AdaptiveStepsize(displace, interval=interval,
696
+ accept_rate=target_accept_rate,
697
+ factor=stepwise_factor,
698
+ verbose=disp)
699
+
700
+ # set up accept tests
701
+ accept_tests = []
702
+ if accept_test is not None:
703
+ if not callable(accept_test):
704
+ raise TypeError("accept_test must be callable")
705
+ accept_tests = [accept_test]
706
+
707
+ # use default
708
+ metropolis = Metropolis(T, random_gen=rng)
709
+ accept_tests.append(metropolis)
710
+
711
+ if niter_success is None:
712
+ niter_success = niter + 2
713
+
714
+ bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
715
+ accept_tests, disp=disp)
716
+
717
+ # The wrapped minimizer is called once during construction of
718
+ # BasinHoppingRunner, so run the callback
719
+ if callable(callback):
720
+ callback(bh.storage.minres.x, bh.storage.minres.fun, True)
721
+
722
+ # start main iteration loop
723
+ count, i = 0, 0
724
+ message = ["requested number of basinhopping iterations completed"
725
+ " successfully"]
726
+ for i in range(niter):
727
+ new_global_min = bh.one_cycle()
728
+
729
+ if callable(callback):
730
+ # should we pass a copy of x?
731
+ val = callback(bh.xtrial, bh.energy_trial, bh.accept)
732
+ if val is not None:
733
+ if val:
734
+ message = ["callback function requested stop early by"
735
+ "returning True"]
736
+ break
737
+
738
+ count += 1
739
+ if new_global_min:
740
+ count = 0
741
+ elif count > niter_success:
742
+ message = ["success condition satisfied"]
743
+ break
744
+
745
+ # prepare return object
746
+ res = bh.res
747
+ res.lowest_optimization_result = bh.storage.get_lowest()
748
+ res.x = np.copy(res.lowest_optimization_result.x)
749
+ res.fun = res.lowest_optimization_result.fun
750
+ res.message = message
751
+ res.nit = i + 1
752
+ res.success = res.lowest_optimization_result.success
753
+ return res
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_bracket.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import scipy._lib._elementwise_iterative_method as eim
3
+ from scipy._lib._util import _RichResult
4
+
5
+ _ELIMITS = -1 # used in _bracket_root
6
+ _ESTOPONESIDE = 2 # used in _bracket_root
7
+
8
+ def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter):
9
+
10
+ if not callable(func):
11
+ raise ValueError('`func` must be callable.')
12
+
13
+ if not np.iterable(args):
14
+ args = (args,)
15
+
16
+ xl0 = np.asarray(xl0)[()]
17
+ if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
18
+ raise ValueError('`xl0` must be numeric and real.')
19
+
20
+ xr0 = xl0 + 1 if xr0 is None else xr0
21
+ xmin = -np.inf if xmin is None else xmin
22
+ xmax = np.inf if xmax is None else xmax
23
+ factor = 2. if factor is None else factor
24
+ xl0, xr0, xmin, xmax, factor = np.broadcast_arrays(xl0, xr0, xmin, xmax, factor)
25
+
26
+ if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
27
+ raise ValueError('`xr0` must be numeric and real.')
28
+
29
+ if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
30
+ raise ValueError('`xmin` must be numeric and real.')
31
+
32
+ if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
33
+ raise ValueError('`xmax` must be numeric and real.')
34
+
35
+ if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
36
+ raise ValueError('`factor` must be numeric and real.')
37
+ if not np.all(factor > 1):
38
+ raise ValueError('All elements of `factor` must be greater than 1.')
39
+
40
+ maxiter = np.asarray(maxiter)
41
+ message = '`maxiter` must be a non-negative integer.'
42
+ if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
43
+ or np.iscomplex(maxiter)):
44
+ raise ValueError(message)
45
+ maxiter_int = int(maxiter[()])
46
+ if not maxiter == maxiter_int or maxiter < 0:
47
+ raise ValueError(message)
48
+
49
+ return func, xl0, xr0, xmin, xmax, factor, args, maxiter
50
+
51
+
52
+ def _bracket_root(func, xl0, xr0=None, *, xmin=None, xmax=None, factor=None,
53
+ args=(), maxiter=1000):
54
+ """Bracket the root of a monotonic scalar function of one variable
55
+
56
+ This function works elementwise when `xl0`, `xr0`, `xmin`, `xmax`, `factor`, and
57
+ the elements of `args` are broadcastable arrays.
58
+
59
+ Parameters
60
+ ----------
61
+ func : callable
62
+ The function for which the root is to be bracketed.
63
+ The signature must be::
64
+
65
+ func(x: ndarray, *args) -> ndarray
66
+
67
+ where each element of ``x`` is a finite real and ``args`` is a tuple,
68
+ which may contain an arbitrary number of arrays that are broadcastable
69
+ with `x`. ``func`` must be an elementwise function: each element
70
+ ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
71
+ xl0, xr0: float array_like
72
+ Starting guess of bracket, which need not contain a root. If `xr0` is
73
+ not provided, ``xr0 = xl0 + 1``. Must be broadcastable with one another.
74
+ xmin, xmax : float array_like, optional
75
+ Minimum and maximum allowable endpoints of the bracket, inclusive. Must
76
+ be broadcastable with `xl0` and `xr0`.
77
+ factor : float array_like, default: 2
78
+ The factor used to grow the bracket. See notes for details.
79
+ args : tuple, optional
80
+ Additional positional arguments to be passed to `func`. Must be arrays
81
+ broadcastable with `xl0`, `xr0`, `xmin`, and `xmax`. If the callable to be
82
+ bracketed requires arguments that are not broadcastable with these
83
+ arrays, wrap that callable with `func` such that `func` accepts
84
+ only `x` and broadcastable arrays.
85
+ maxiter : int, optional
86
+ The maximum number of iterations of the algorithm to perform.
87
+
88
+ Returns
89
+ -------
90
+ res : _RichResult
91
+ An instance of `scipy._lib._util._RichResult` with the following
92
+ attributes. The descriptions are written as though the values will be
93
+ scalars; however, if `func` returns an array, the outputs will be
94
+ arrays of the same shape.
95
+
96
+ xl, xr : float
97
+ The lower and upper ends of the bracket, if the algorithm
98
+ terminated successfully.
99
+ fl, fr : float
100
+ The function value at the lower and upper ends of the bracket.
101
+ nfev : int
102
+ The number of function evaluations required to find the bracket.
103
+ This is distinct from the number of times `func` is *called*
104
+ because the function may evaluated at multiple points in a single
105
+ call.
106
+ nit : int
107
+ The number of iterations of the algorithm that were performed.
108
+ status : int
109
+ An integer representing the exit status of the algorithm.
110
+
111
+ - ``0`` : The algorithm produced a valid bracket.
112
+ - ``-1`` : The bracket expanded to the allowable limits without finding a bracket.
113
+ - ``-2`` : The maximum number of iterations was reached.
114
+ - ``-3`` : A non-finite value was encountered.
115
+ - ``-4`` : Iteration was terminated by `callback`.
116
+ - ``-5``: The initial bracket does not satisfy `xmin <= xl0 < xr0 < xmax`.
117
+ - ``1`` : The algorithm is proceeding normally (in `callback` only).
118
+ - ``2`` : A bracket was found in the opposite search direction (in `callback` only).
119
+
120
+ success : bool
121
+ ``True`` when the algorithm terminated successfully (status ``0``).
122
+
123
+ Notes
124
+ -----
125
+ This function generalizes an algorithm found in pieces throughout
126
+ `scipy.stats`. The strategy is to iteratively grow the bracket `(l, r)`
127
+ until ``func(l) < 0 < func(r)``. The bracket grows to the left as follows.
128
+
129
+ - If `xmin` is not provided, the distance between `xl0` and `l` is iteratively
130
+ increased by `factor`.
131
+ - If `xmin` is provided, the distance between `xmin` and `l` is iteratively
132
+ decreased by `factor`. Note that this also *increases* the bracket size.
133
+
134
+ Growth of the bracket to the right is analogous.
135
+
136
+ Growth of the bracket in one direction stops when the endpoint is no longer
137
+ finite, the function value at the endpoint is no longer finite, or the
138
+ endpoint reaches its limiting value (`xmin` or `xmax`). Iteration terminates
139
+ when the bracket stops growing in both directions, the bracket surrounds
140
+ the root, or a root is found (accidentally).
141
+
142
+ If two brackets are found - that is, a bracket is found on both sides in
143
+ the same iteration, the smaller of the two is returned.
144
+ If roots of the function are found, both `l` and `r` are set to the
145
+ leftmost root.
146
+
147
+ """ # noqa: E501
148
+ # Todo:
149
+ # - find bracket with sign change in specified direction
150
+ # - Add tolerance
151
+ # - allow factor < 1?
152
+
153
+ callback = None # works; I just don't want to test it
154
+ temp = _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter)
155
+ func, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
156
+
157
+ xs = (xl0, xr0)
158
+ temp = eim._initialize(func, xs, args)
159
+ func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
160
+ xl0, xr0 = xs
161
+ xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
162
+ xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
163
+ invalid_bracket = ~((xmin <= xl0) & (xl0 < xr0) & (xr0 <= xmax))
164
+
165
+ # The approach is to treat the left and right searches as though they were
166
+ # (almost) totally independent one-sided bracket searches. (The interaction
167
+ # is considered when checking for termination and preparing the result
168
+ # object.)
169
+ # `x` is the "moving" end of the bracket
170
+ x = np.concatenate(xs)
171
+ f = np.concatenate(fs)
172
+ invalid_bracket = np.concatenate((invalid_bracket, invalid_bracket))
173
+ n = len(x) // 2
174
+
175
+ # `x_last` is the previous location of the moving end of the bracket. If
176
+ # the signs of `f` and `f_last` are different, `x` and `x_last` form a
177
+ # bracket.
178
+ x_last = np.concatenate((x[n:], x[:n]))
179
+ f_last = np.concatenate((f[n:], f[:n]))
180
+ # `x0` is the "fixed" end of the bracket.
181
+ x0 = x_last
182
+ # We don't need to retain the corresponding function value, since the
183
+ # fixed end of the bracket is only needed to compute the new value of the
184
+ # moving end; it is never returned.
185
+ limit = np.concatenate((xmin, xmax))
186
+
187
+ factor = np.broadcast_to(factor, shape).astype(dtype, copy=False).ravel()
188
+ factor = np.concatenate((factor, factor))
189
+
190
+ active = np.arange(2*n)
191
+ args = [np.concatenate((arg, arg)) for arg in args]
192
+
193
+ # This is needed due to inner workings of `eim._loop`.
194
+ # We're abusing it a tiny bit.
195
+ shape = shape + (2,)
196
+
197
+ # `d` is for "distance".
198
+ # For searches without a limit, the distance between the fixed end of the
199
+ # bracket `x0` and the moving end `x` will grow by `factor` each iteration.
200
+ # For searches with a limit, the distance between the `limit` and moving
201
+ # end of the bracket `x` will shrink by `factor` each iteration.
202
+ i = np.isinf(limit)
203
+ ni = ~i
204
+ d = np.zeros_like(x)
205
+ d[i] = x[i] - x0[i]
206
+ d[ni] = limit[ni] - x[ni]
207
+
208
+ status = np.full_like(x, eim._EINPROGRESS, dtype=int) # in progress
209
+ status[invalid_bracket] = eim._EINPUTERR
210
+ nit, nfev = 0, 1 # one function evaluation per side performed above
211
+
212
+ work = _RichResult(x=x, x0=x0, f=f, limit=limit, factor=factor,
213
+ active=active, d=d, x_last=x_last, f_last=f_last,
214
+ nit=nit, nfev=nfev, status=status, args=args,
215
+ xl=None, xr=None, fl=None, fr=None, n=n)
216
+ res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xr', 'xr'),
217
+ ('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'),
218
+ ('fr', 'fr'), ('x', 'x'), ('f', 'f'),
219
+ ('x_last', 'x_last'), ('f_last', 'f_last')]
220
+
221
+ def pre_func_eval(work):
222
+ # Initialize moving end of bracket
223
+ x = np.zeros_like(work.x)
224
+
225
+ # Unlimited brackets grow by `factor` by increasing distance from fixed
226
+ # end to moving end.
227
+ i = np.isinf(work.limit) # indices of unlimited brackets
228
+ work.d[i] *= work.factor[i]
229
+ x[i] = work.x0[i] + work.d[i]
230
+
231
+ # Limited brackets grow by decreasing the distance from the limit to
232
+ # the moving end.
233
+ ni = ~i # indices of limited brackets
234
+ work.d[ni] /= work.factor[ni]
235
+ x[ni] = work.limit[ni] - work.d[ni]
236
+
237
+ return x
238
+
239
+ def post_func_eval(x, f, work):
240
+ # Keep track of the previous location of the moving end so that we can
241
+ # return a narrower bracket. (The alternative is to remember the
242
+ # original fixed end, but then the bracket would be wider than needed.)
243
+ work.x_last = work.x
244
+ work.f_last = work.f
245
+ work.x = x
246
+ work.f = f
247
+
248
+ def check_termination(work):
249
+ # Condition 0: initial bracket is invalid
250
+ stop = (work.status == eim._EINPUTERR)
251
+
252
+ # Condition 1: a valid bracket (or the root itself) has been found
253
+ sf = np.sign(work.f)
254
+ sf_last = np.sign(work.f_last)
255
+ i = ((sf_last == -sf) | (sf_last == 0) | (sf == 0)) & ~stop
256
+ work.status[i] = eim._ECONVERGED
257
+ stop[i] = True
258
+
259
+ # Condition 2: the other side's search found a valid bracket.
260
+ # (If we just found a bracket with the rightward search, we can stop
261
+ # the leftward search, and vice-versa.)
262
+ # To do this, we need to set the status of the other side's search;
263
+ # this is tricky because `work.status` contains only the *active*
264
+ # elements, so we don't immediately know the index of the element we
265
+ # need to set - or even if it's still there. (That search may have
266
+ # terminated already, e.g. by reaching its `limit`.)
267
+ # To facilitate this, `work.active` contains a unit integer index of
268
+ # each search. Index `k` (`k < n)` and `k + n` correspond with a
269
+ # leftward and rightward search, respectively. Elements are removed
270
+ # from `work.active` just as they are removed from `work.status`, so
271
+ # we use `work.active` to help find the right location in
272
+ # `work.status`.
273
+ # Get the integer indices of the elements that can also stop
274
+ also_stop = (work.active[i] + work.n) % (2*work.n)
275
+ # Check whether they are still active.
276
+ # To start, we need to find out where in `work.active` they would
277
+ # appear if they are indeed there.
278
+ j = np.searchsorted(work.active, also_stop)
279
+ # If the location exceeds the length of the `work.active`, they are
280
+ # not there.
281
+ j = j[j < len(work.active)]
282
+ # Check whether they are still there.
283
+ j = j[also_stop == work.active[j]]
284
+ # Now convert these to boolean indices to use with `work.status`.
285
+ i = np.zeros_like(stop)
286
+ i[j] = True # boolean indices of elements that can also stop
287
+ i = i & ~stop
288
+ work.status[i] = _ESTOPONESIDE
289
+ stop[i] = True
290
+
291
+ # Condition 3: moving end of bracket reaches limit
292
+ i = (work.x == work.limit) & ~stop
293
+ work.status[i] = _ELIMITS
294
+ stop[i] = True
295
+
296
+ # Condition 4: non-finite value encountered
297
+ i = ~(np.isfinite(work.x) & np.isfinite(work.f)) & ~stop
298
+ work.status[i] = eim._EVALUEERR
299
+ stop[i] = True
300
+
301
+ return stop
302
+
303
+ def post_termination_check(work):
304
+ pass
305
+
306
+ def customize_result(res, shape):
307
+ n = len(res['x']) // 2
308
+
309
+ # To avoid ambiguity, below we refer to `xl0`, the initial left endpoint
310
+ # as `a` and `xr0`, the initial right endpoint, as `b`.
311
+ # Because we treat the two one-sided searches as though they were
312
+ # independent, what we keep track of in `work` and what we want to
313
+ # return in `res` look quite different. Combine the results from the
314
+ # two one-sided searches before reporting the results to the user.
315
+ # - "a" refers to the leftward search (the moving end started at `a`)
316
+ # - "b" refers to the rightward search (the moving end started at `b`)
317
+ # - "l" refers to the left end of the bracket (closer to -oo)
318
+ # - "r" refers to the right end of the bracket (closer to +oo)
319
+ xal = res['x'][:n]
320
+ xar = res['x_last'][:n]
321
+ xbl = res['x_last'][n:]
322
+ xbr = res['x'][n:]
323
+
324
+ fal = res['f'][:n]
325
+ far = res['f_last'][:n]
326
+ fbl = res['f_last'][n:]
327
+ fbr = res['f'][n:]
328
+
329
+ # Initialize the brackets and corresponding function values to return
330
+ # to the user. Brackets may not be valid (e.g. there is no root,
331
+ # there weren't enough iterations, NaN encountered), but we still need
332
+ # to return something. One option would be all NaNs, but what I've
333
+ # chosen here is the left- and right-most points at which the function
334
+ # has been evaluated. This gives the user some information about what
335
+ # interval of the real line has been searched and shows that there is
336
+ # no sign change between the two ends.
337
+ xl = xal.copy()
338
+ fl = fal.copy()
339
+ xr = xbr.copy()
340
+ fr = fbr.copy()
341
+
342
+ # `status` indicates whether the bracket is valid or not. If so,
343
+ # we want to adjust the bracket we return to be the narrowest possible
344
+ # given the points at which we evaluated the function.
345
+ # For example if bracket "a" is valid and smaller than bracket "b" OR
346
+ # if bracket "a" is valid and bracket "b" is not valid, we want to
347
+ # return bracket "a" (and vice versa).
348
+ sa = res['status'][:n]
349
+ sb = res['status'][n:]
350
+
351
+ da = xar - xal
352
+ db = xbr - xbl
353
+
354
+ i1 = ((da <= db) & (sa == 0)) | ((sa == 0) & (sb != 0))
355
+ i2 = ((db <= da) & (sb == 0)) | ((sb == 0) & (sa != 0))
356
+
357
+ xr[i1] = xar[i1]
358
+ fr[i1] = far[i1]
359
+ xl[i2] = xbl[i2]
360
+ fl[i2] = fbl[i2]
361
+
362
+ # Finish assembling the result object
363
+ res['xl'] = xl
364
+ res['xr'] = xr
365
+ res['fl'] = fl
366
+ res['fr'] = fr
367
+
368
+ res['nit'] = np.maximum(res['nit'][:n], res['nit'][n:])
369
+ res['nfev'] = res['nfev'][:n] + res['nfev'][n:]
370
+ # If the status on one side is zero, the status is zero. In any case,
371
+ # report the status from one side only.
372
+ res['status'] = np.choose(sa == 0, (sb, sa))
373
+ res['success'] = (res['status'] == 0)
374
+
375
+ del res['x']
376
+ del res['f']
377
+ del res['x_last']
378
+ del res['f_last']
379
+
380
+ return shape[:-1]
381
+
382
+ return eim._loop(work, callback, shape, maxiter, func, args, dtype,
383
+ pre_func_eval, post_func_eval, check_termination,
384
+ post_termination_check, customize_result, res_work_pairs,
385
+ xp)
386
+
387
+
388
+ def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter):
389
+
390
+ if not callable(func):
391
+ raise ValueError('`func` must be callable.')
392
+
393
+ if not np.iterable(args):
394
+ args = (args,)
395
+
396
+ xm0 = np.asarray(xm0)[()]
397
+ if not np.issubdtype(xm0.dtype, np.number) or np.iscomplex(xm0).any():
398
+ raise ValueError('`xm0` must be numeric and real.')
399
+
400
+ xmin = -np.inf if xmin is None else xmin
401
+ xmax = np.inf if xmax is None else xmax
402
+
403
+ # If xl0 (xr0) is not supplied, fill with a dummy value for the sake
404
+ # of broadcasting. We need to wait until xmin (xmax) has been validated
405
+ # to compute the default values.
406
+ xl0_not_supplied = False
407
+ if xl0 is None:
408
+ xl0 = np.nan
409
+ xl0_not_supplied = True
410
+
411
+ xr0_not_supplied = False
412
+ if xr0 is None:
413
+ xr0 = np.nan
414
+ xr0_not_supplied = True
415
+
416
+ factor = 2.0 if factor is None else factor
417
+ xl0, xm0, xr0, xmin, xmax, factor = np.broadcast_arrays(
418
+ xl0, xm0, xr0, xmin, xmax, factor
419
+ )
420
+
421
+ if not np.issubdtype(xl0.dtype, np.number) or np.iscomplex(xl0).any():
422
+ raise ValueError('`xl0` must be numeric and real.')
423
+
424
+ if not np.issubdtype(xr0.dtype, np.number) or np.iscomplex(xr0).any():
425
+ raise ValueError('`xr0` must be numeric and real.')
426
+
427
+ if not np.issubdtype(xmin.dtype, np.number) or np.iscomplex(xmin).any():
428
+ raise ValueError('`xmin` must be numeric and real.')
429
+
430
+ if not np.issubdtype(xmax.dtype, np.number) or np.iscomplex(xmax).any():
431
+ raise ValueError('`xmax` must be numeric and real.')
432
+
433
+ if not np.issubdtype(factor.dtype, np.number) or np.iscomplex(factor).any():
434
+ raise ValueError('`factor` must be numeric and real.')
435
+ if not np.all(factor > 1):
436
+ raise ValueError('All elements of `factor` must be greater than 1.')
437
+
438
+ # Calculate default values of xl0 and/or xr0 if they have not been supplied
439
+ # by the user. We need to be careful to ensure xl0 and xr0 are not outside
440
+ # of (xmin, xmax).
441
+ if xl0_not_supplied:
442
+ xl0 = xm0 - np.minimum((xm0 - xmin)/16, 0.5)
443
+ if xr0_not_supplied:
444
+ xr0 = xm0 + np.minimum((xmax - xm0)/16, 0.5)
445
+
446
+ maxiter = np.asarray(maxiter)
447
+ message = '`maxiter` must be a non-negative integer.'
448
+ if (not np.issubdtype(maxiter.dtype, np.number) or maxiter.shape != tuple()
449
+ or np.iscomplex(maxiter)):
450
+ raise ValueError(message)
451
+ maxiter_int = int(maxiter[()])
452
+ if not maxiter == maxiter_int or maxiter < 0:
453
+ raise ValueError(message)
454
+
455
+ return func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter
456
+
457
+
458
+ def _bracket_minimum(func, xm0, *, xl0=None, xr0=None, xmin=None, xmax=None,
459
+ factor=None, args=(), maxiter=1000):
460
+ """Bracket the minimum of a unimodal scalar function of one variable
461
+
462
+ This function works elementwise when `xm0`, `xl0`, `xr0`, `xmin`, `xmax`,
463
+ and the elements of `args` are broadcastable arrays.
464
+
465
+ Parameters
466
+ ----------
467
+ func : callable
468
+ The function for which the minimum is to be bracketed.
469
+ The signature must be::
470
+
471
+ func(x: ndarray, *args) -> ndarray
472
+
473
+ where each element of ``x`` is a finite real and ``args`` is a tuple,
474
+ which may contain an arbitrary number of arrays that are broadcastable
475
+ with ``x``. `func` must be an elementwise function: each element
476
+ ``func(x)[i]`` must equal ``func(x[i])`` for all indices `i`.
477
+ xm0: float array_like
478
+ Starting guess for middle point of bracket.
479
+ xl0, xr0: float array_like, optional
480
+ Starting guesses for left and right endpoints of the bracket. Must be
481
+ broadcastable with one another and with `xm0`.
482
+ xmin, xmax : float array_like, optional
483
+ Minimum and maximum allowable endpoints of the bracket, inclusive. Must
484
+ be broadcastable with `xl0`, `xm0`, and `xr0`.
485
+ factor : float array_like, optional
486
+ Controls expansion of bracket endpoint in downhill direction. Works
487
+ differently in the cases where a limit is set in the downhill direction
488
+ with `xmax` or `xmin`. See Notes.
489
+ args : tuple, optional
490
+ Additional positional arguments to be passed to `func`. Must be arrays
491
+ broadcastable with `xl0`, `xm0`, `xr0`, `xmin`, and `xmax`. If the
492
+ callable to be bracketed requires arguments that are not broadcastable
493
+ with these arrays, wrap that callable with `func` such that `func`
494
+ accepts only ``x`` and broadcastable arrays.
495
+ maxiter : int, optional
496
+ The maximum number of iterations of the algorithm to perform. The number
497
+ of function evaluations is three greater than the number of iterations.
498
+
499
+ Returns
500
+ -------
501
+ res : _RichResult
502
+ An instance of `scipy._lib._util._RichResult` with the following
503
+ attributes. The descriptions are written as though the values will be
504
+ scalars; however, if `func` returns an array, the outputs will be
505
+ arrays of the same shape.
506
+
507
+ xl, xm, xr : float
508
+ The left, middle, and right points of the bracket, if the algorithm
509
+ terminated successfully.
510
+ fl, fm, fr : float
511
+ The function value at the left, middle, and right points of the bracket.
512
+ nfev : int
513
+ The number of function evaluations required to find the bracket.
514
+ nit : int
515
+ The number of iterations of the algorithm that were performed.
516
+ status : int
517
+ An integer representing the exit status of the algorithm.
518
+
519
+ - ``0`` : The algorithm produced a valid bracket.
520
+ - ``-1`` : The bracket expanded to the allowable limits. Assuming
521
+ unimodality, this implies the endpoint at the limit is a
522
+ minimizer.
523
+ - ``-2`` : The maximum number of iterations was reached.
524
+ - ``-3`` : A non-finite value was encountered.
525
+ - ``-4`` : ``None`` shall pass.
526
+ - ``-5`` : The initial bracket does not satisfy
527
+ `xmin <= xl0 < xm0 < xr0 <= xmax`.
528
+
529
+ success : bool
530
+ ``True`` when the algorithm terminated successfully (status ``0``).
531
+
532
+ Notes
533
+ -----
534
+ Similar to `scipy.optimize.bracket`, this function seeks to find real
535
+ points ``xl < xm < xr`` such that ``f(xl) >= f(xm)`` and ``f(xr) >= f(xm)``,
536
+ where at least one of the inequalities is strict. Unlike `scipy.optimize.bracket`,
537
+ this function can operate in a vectorized manner on array input, so long as
538
+ the input arrays are broadcastable with each other. Also unlike
539
+ `scipy.optimize.bracket`, users may specify minimum and maximum endpoints
540
+ for the desired bracket.
541
+
542
+ Given an initial trio of points ``xl = xl0``, ``xm = xm0``, ``xr = xr0``,
543
+ the algorithm checks if these points already give a valid bracket. If not,
544
+ a new endpoint, ``w`` is chosen in the "downhill" direction, ``xm`` becomes the new
545
+ opposite endpoint, and either `xl` or `xr` becomes the new middle point,
546
+ depending on which direction is downhill. The algorithm repeats from here.
547
+
548
+ The new endpoint `w` is chosen differently depending on whether or not a
549
+ boundary `xmin` or `xmax` has been set in the downhill direction. Without
550
+ loss of generality, suppose the downhill direction is to the right, so that
551
+ ``f(xl) > f(xm) > f(xr)``. If there is no boundary to the right, then `w`
552
+ is chosen to be ``xr + factor * (xr - xm)`` where `factor` is controlled by
553
+ the user (defaults to 2.0) so that step sizes increase in geometric proportion.
554
+ If there is a boundary, `xmax` in this case, then `w` is chosen to be
555
+ ``xmax - (xmax - xr)/factor``, with steps slowing to a stop at
556
+ `xmax`. This cautious approach ensures that a minimum near but distinct from
557
+ the boundary isn't missed while also detecting whether or not the `xmax` is
558
+ a minimizer when `xmax` is reached after a finite number of steps.
559
+ """ # noqa: E501
560
+ callback = None # works; I just don't want to test it
561
+
562
+ temp = _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter)
563
+ func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter = temp
564
+
565
+ xs = (xl0, xm0, xr0)
566
+ temp = eim._initialize(func, xs, args)
567
+ func, xs, fs, args, shape, dtype, xp = temp
568
+
569
+ xl0, xm0, xr0 = xs
570
+ fl0, fm0, fr0 = fs
571
+ xmin = np.broadcast_to(xmin, shape).astype(dtype, copy=False).ravel()
572
+ xmax = np.broadcast_to(xmax, shape).astype(dtype, copy=False).ravel()
573
+ invalid_bracket = ~((xmin <= xl0) & (xl0 < xm0) & (xm0 < xr0) & (xr0 <= xmax))
574
+ # We will modify factor later on so make a copy. np.broadcast_to returns
575
+ # a read-only view.
576
+ factor = np.broadcast_to(factor, shape).astype(dtype, copy=True).ravel()
577
+
578
+ # To simplify the logic, swap xl and xr if f(xl) < f(xr). We should always be
579
+ # marching downhill in the direction from xl to xr.
580
+ comp = fl0 < fr0
581
+ xl0[comp], xr0[comp] = xr0[comp], xl0[comp]
582
+ fl0[comp], fr0[comp] = fr0[comp], fl0[comp]
583
+ # We only need the boundary in the direction we're traveling.
584
+ limit = np.where(comp, xmin, xmax)
585
+
586
+ unlimited = np.isinf(limit)
587
+ limited = ~unlimited
588
+ step = np.empty_like(xl0)
589
+
590
+ step[unlimited] = (xr0[unlimited] - xm0[unlimited])
591
+ step[limited] = (limit[limited] - xr0[limited])
592
+
593
+ # Step size is divided by factor for case where there is a limit.
594
+ factor[limited] = 1 / factor[limited]
595
+
596
+ status = np.full_like(xl0, eim._EINPROGRESS, dtype=int)
597
+ status[invalid_bracket] = eim._EINPUTERR
598
+ nit, nfev = 0, 3
599
+
600
+ work = _RichResult(xl=xl0, xm=xm0, xr=xr0, xr0=xr0, fl=fl0, fm=fm0, fr=fr0,
601
+ step=step, limit=limit, limited=limited, factor=factor, nit=nit,
602
+ nfev=nfev, status=status, args=args)
603
+
604
+ res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xm', 'xm'), ('xr', 'xr'),
605
+ ('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'), ('fm', 'fm'),
606
+ ('fr', 'fr')]
607
+
608
+ def pre_func_eval(work):
609
+ work.step *= work.factor
610
+ x = np.empty_like(work.xr)
611
+ x[~work.limited] = work.xr0[~work.limited] + work.step[~work.limited]
612
+ x[work.limited] = work.limit[work.limited] - work.step[work.limited]
613
+ # Since the new bracket endpoint is calculated from an offset with the
614
+ # limit, it may be the case that the new endpoint equals the old endpoint,
615
+ # when the old endpoint is sufficiently close to the limit. We use the
616
+ # limit itself as the new endpoint in these cases.
617
+ x[work.limited] = np.where(
618
+ x[work.limited] == work.xr[work.limited],
619
+ work.limit[work.limited],
620
+ x[work.limited],
621
+ )
622
+ return x
623
+
624
+ def post_func_eval(x, f, work):
625
+ work.xl, work.xm, work.xr = work.xm, work.xr, x
626
+ work.fl, work.fm, work.fr = work.fm, work.fr, f
627
+
628
+ def check_termination(work):
629
+ # Condition 0: Initial bracket is invalid.
630
+ stop = (work.status == eim._EINPUTERR)
631
+
632
+ # Condition 1: A valid bracket has been found.
633
+ i = (
634
+ (work.fl >= work.fm) & (work.fr > work.fm)
635
+ | (work.fl > work.fm) & (work.fr >= work.fm)
636
+ ) & ~stop
637
+ work.status[i] = eim._ECONVERGED
638
+ stop[i] = True
639
+
640
+ # Condition 2: Moving end of bracket reaches limit.
641
+ i = (work.xr == work.limit) & ~stop
642
+ work.status[i] = _ELIMITS
643
+ stop[i] = True
644
+
645
+ # Condition 3: non-finite value encountered
646
+ i = ~(np.isfinite(work.xr) & np.isfinite(work.fr)) & ~stop
647
+ work.status[i] = eim._EVALUEERR
648
+ stop[i] = True
649
+
650
+ return stop
651
+
652
+ def post_termination_check(work):
653
+ pass
654
+
655
+ def customize_result(res, shape):
656
+ # Reorder entries of xl and xr if they were swapped due to f(xl0) < f(xr0).
657
+ comp = res['xl'] > res['xr']
658
+ res['xl'][comp], res['xr'][comp] = res['xr'][comp], res['xl'][comp]
659
+ res['fl'][comp], res['fr'][comp] = res['fr'][comp], res['fl'][comp]
660
+ return shape
661
+
662
+ return eim._loop(work, callback, shape,
663
+ maxiter, func, args, dtype,
664
+ pre_func_eval, post_func_eval,
665
+ check_termination, post_termination_check,
666
+ customize_result, res_work_pairs, xp)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_chandrupatla.py ADDED
@@ -0,0 +1,549 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import scipy._lib._elementwise_iterative_method as eim
4
+ from scipy._lib._util import _RichResult
5
+ from scipy._lib._array_api import xp_clip, xp_minimum, xp_sign
6
+
7
+ # TODO:
8
+ # - (maybe?) don't use fancy indexing assignment
9
+ # - figure out how to replace the new `try`/`except`s
10
+
11
+
12
+ def _chandrupatla(func, a, b, *, args=(), xatol=None, xrtol=None,
13
+ fatol=None, frtol=0, maxiter=None, callback=None):
14
+ """Find the root of an elementwise function using Chandrupatla's algorithm.
15
+
16
+ For each element of the output of `func`, `chandrupatla` seeks the scalar
17
+ root that makes the element 0. This function allows for `a`, `b`, and the
18
+ output of `func` to be of any broadcastable shapes.
19
+
20
+ Parameters
21
+ ----------
22
+ func : callable
23
+ The function whose root is desired. The signature must be::
24
+
25
+ func(x: ndarray, *args) -> ndarray
26
+
27
+ where each element of ``x`` is a finite real and ``args`` is a tuple,
28
+ which may contain an arbitrary number of components of any type(s).
29
+ ``func`` must be an elementwise function: each element ``func(x)[i]``
30
+ must equal ``func(x[i])`` for all indices ``i``. `_chandrupatla`
31
+ seeks an array ``x`` such that ``func(x)`` is an array of zeros.
32
+ a, b : array_like
33
+ The lower and upper bounds of the root of the function. Must be
34
+ broadcastable with one another.
35
+ args : tuple, optional
36
+ Additional positional arguments to be passed to `func`.
37
+ xatol, xrtol, fatol, frtol : float, optional
38
+ Absolute and relative tolerances on the root and function value.
39
+ See Notes for details.
40
+ maxiter : int, optional
41
+ The maximum number of iterations of the algorithm to perform.
42
+ The default is the maximum possible number of bisections within
43
+ the (normal) floating point numbers of the relevant dtype.
44
+ callback : callable, optional
45
+ An optional user-supplied function to be called before the first
46
+ iteration and after each iteration.
47
+ Called as ``callback(res)``, where ``res`` is a ``_RichResult``
48
+ similar to that returned by `_chandrupatla` (but containing the current
49
+ iterate's values of all variables). If `callback` raises a
50
+ ``StopIteration``, the algorithm will terminate immediately and
51
+ `_chandrupatla` will return a result.
52
+
53
+ Returns
54
+ -------
55
+ res : _RichResult
56
+ An instance of `scipy._lib._util._RichResult` with the following
57
+ attributes. The descriptions are written as though the values will be
58
+ scalars; however, if `func` returns an array, the outputs will be
59
+ arrays of the same shape.
60
+
61
+ x : float
62
+ The root of the function, if the algorithm terminated successfully.
63
+ nfev : int
64
+ The number of times the function was called to find the root.
65
+ nit : int
66
+ The number of iterations of Chandrupatla's algorithm performed.
67
+ status : int
68
+ An integer representing the exit status of the algorithm.
69
+ ``0`` : The algorithm converged to the specified tolerances.
70
+ ``-1`` : The algorithm encountered an invalid bracket.
71
+ ``-2`` : The maximum number of iterations was reached.
72
+ ``-3`` : A non-finite value was encountered.
73
+ ``-4`` : Iteration was terminated by `callback`.
74
+ ``1`` : The algorithm is proceeding normally (in `callback` only).
75
+ success : bool
76
+ ``True`` when the algorithm terminated successfully (status ``0``).
77
+ fun : float
78
+ The value of `func` evaluated at `x`.
79
+ xl, xr : float
80
+ The lower and upper ends of the bracket.
81
+ fl, fr : float
82
+ The function value at the lower and upper ends of the bracket.
83
+
84
+ Notes
85
+ -----
86
+ Implemented based on Chandrupatla's original paper [1]_.
87
+
88
+ If ``xl`` and ``xr`` are the left and right ends of the bracket,
89
+ ``xmin = xl if abs(func(xl)) <= abs(func(xr)) else xr``,
90
+ and ``fmin0 = min(func(a), func(b))``, then the algorithm is considered to
91
+ have converged when ``abs(xr - xl) < xatol + abs(xmin) * xrtol`` or
92
+ ``fun(xmin) <= fatol + abs(fmin0) * frtol``. This is equivalent to the
93
+ termination condition described in [1]_ with ``xrtol = 4e-10``,
94
+ ``xatol = 1e-5``, and ``fatol = frtol = 0``. The default values are
95
+ ``xatol = 4*tiny``, ``xrtol = 4*eps``, ``frtol = 0``, and ``fatol = tiny``,
96
+ where ``eps`` and ``tiny`` are the precision and smallest normal number
97
+ of the result ``dtype`` of function inputs and outputs.
98
+
99
+ References
100
+ ----------
101
+
102
+ .. [1] Chandrupatla, Tirupathi R.
103
+ "A new hybrid quadratic/bisection algorithm for finding the zero of a
104
+ nonlinear function without using derivatives".
105
+ Advances in Engineering Software, 28(3), 145-149.
106
+ https://doi.org/10.1016/s0965-9978(96)00051-8
107
+
108
+ See Also
109
+ --------
110
+ brentq, brenth, ridder, bisect, newton
111
+
112
+ Examples
113
+ --------
114
+ >>> from scipy import optimize
115
+ >>> def f(x, c):
116
+ ... return x**3 - 2*x - c
117
+ >>> c = 5
118
+ >>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
119
+ >>> res.x
120
+ 2.0945514818937463
121
+
122
+ >>> c = [3, 4, 5]
123
+ >>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
124
+ >>> res.x
125
+ array([1.8932892 , 2. , 2.09455148])
126
+
127
+ """
128
+ res = _chandrupatla_iv(func, args, xatol, xrtol,
129
+ fatol, frtol, maxiter, callback)
130
+ func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
131
+
132
+ # Initialization
133
+ temp = eim._initialize(func, (a, b), args)
134
+ func, xs, fs, args, shape, dtype, xp = temp
135
+ x1, x2 = xs
136
+ f1, f2 = fs
137
+ status = xp.full_like(x1, eim._EINPROGRESS, dtype=xp.int32) # in progress
138
+ nit, nfev = 0, 2 # two function evaluations performed above
139
+ finfo = xp.finfo(dtype)
140
+ xatol = 4*finfo.smallest_normal if xatol is None else xatol
141
+ xrtol = 4*finfo.eps if xrtol is None else xrtol
142
+ fatol = finfo.smallest_normal if fatol is None else fatol
143
+ frtol = frtol * xp_minimum(xp.abs(f1), xp.abs(f2))
144
+ maxiter = (math.log2(finfo.max) - math.log2(finfo.smallest_normal)
145
+ if maxiter is None else maxiter)
146
+ work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=None, f3=None, t=0.5,
147
+ xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
148
+ nit=nit, nfev=nfev, status=status)
149
+ res_work_pairs = [('status', 'status'), ('x', 'xmin'), ('fun', 'fmin'),
150
+ ('nit', 'nit'), ('nfev', 'nfev'), ('xl', 'x1'),
151
+ ('fl', 'f1'), ('xr', 'x2'), ('fr', 'f2')]
152
+
153
+ def pre_func_eval(work):
154
+ # [1] Figure 1 (first box)
155
+ x = work.x1 + work.t * (work.x2 - work.x1)
156
+ return x
157
+
158
+ def post_func_eval(x, f, work):
159
+ # [1] Figure 1 (first diamond and boxes)
160
+ # Note: y/n are reversed in figure; compare to BASIC in appendix
161
+ work.x3, work.f3 = (xp.asarray(work.x2, copy=True),
162
+ xp.asarray(work.f2, copy=True))
163
+ j = xp.sign(f) == xp.sign(work.f1)
164
+ nj = ~j
165
+ work.x3[j], work.f3[j] = work.x1[j], work.f1[j]
166
+ work.x2[nj], work.f2[nj] = work.x1[nj], work.f1[nj]
167
+ work.x1, work.f1 = x, f
168
+
169
+ def check_termination(work):
170
+ # [1] Figure 1 (second diamond)
171
+ # Check for all terminal conditions and record statuses.
172
+
173
+ # See [1] Section 4 (first two sentences)
174
+ i = xp.abs(work.f1) < xp.abs(work.f2)
175
+ work.xmin = xp.where(i, work.x1, work.x2)
176
+ work.fmin = xp.where(i, work.f1, work.f2)
177
+ stop = xp.zeros_like(work.x1, dtype=xp.bool) # termination condition met
178
+
179
+ # If function value tolerance is met, report successful convergence,
180
+ # regardless of other conditions. Note that `frtol` has been redefined
181
+ # as `frtol = frtol * minimum(f1, f2)`, where `f1` and `f2` are the
182
+ # function evaluated at the original ends of the bracket.
183
+ i = xp.abs(work.fmin) <= work.fatol + work.frtol
184
+ work.status[i] = eim._ECONVERGED
185
+ stop[i] = True
186
+
187
+ # If the bracket is no longer valid, report failure (unless a function
188
+ # tolerance is met, as detected above).
189
+ i = (xp_sign(work.f1) == xp_sign(work.f2)) & ~stop
190
+ NaN = xp.asarray(xp.nan, dtype=work.xmin.dtype)
191
+ work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._ESIGNERR
192
+ stop[i] = True
193
+
194
+ # If the abscissae are non-finite or either function value is NaN,
195
+ # report failure.
196
+ x_nonfinite = ~(xp.isfinite(work.x1) & xp.isfinite(work.x2))
197
+ f_nan = xp.isnan(work.f1) & xp.isnan(work.f2)
198
+ i = (x_nonfinite | f_nan) & ~stop
199
+ work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._EVALUEERR
200
+ stop[i] = True
201
+
202
+ # This is the convergence criterion used in bisect. Chandrupatla's
203
+ # criterion is equivalent to this except with a factor of 4 on `xrtol`.
204
+ work.dx = xp.abs(work.x2 - work.x1)
205
+ work.tol = xp.abs(work.xmin) * work.xrtol + work.xatol
206
+ i = work.dx < work.tol
207
+ work.status[i] = eim._ECONVERGED
208
+ stop[i] = True
209
+
210
+ return stop
211
+
212
+ def post_termination_check(work):
213
+ # [1] Figure 1 (third diamond and boxes / Equation 1)
214
+ xi1 = (work.x1 - work.x2) / (work.x3 - work.x2)
215
+ phi1 = (work.f1 - work.f2) / (work.f3 - work.f2)
216
+ alpha = (work.x3 - work.x1) / (work.x2 - work.x1)
217
+ j = ((1 - xp.sqrt(1 - xi1)) < phi1) & (phi1 < xp.sqrt(xi1))
218
+
219
+ f1j, f2j, f3j, alphaj = work.f1[j], work.f2[j], work.f3[j], alpha[j]
220
+ t = xp.full_like(alpha, 0.5)
221
+ t[j] = (f1j / (f1j - f2j) * f3j / (f3j - f2j)
222
+ - alphaj * f1j / (f3j - f1j) * f2j / (f2j - f3j))
223
+
224
+ # [1] Figure 1 (last box; see also BASIC in appendix with comment
225
+ # "Adjust T Away from the Interval Boundary")
226
+ tl = 0.5 * work.tol / work.dx
227
+ work.t = xp_clip(t, tl, 1 - tl)
228
+
229
+ def customize_result(res, shape):
230
+ xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
231
+ i = res['xl'] < res['xr']
232
+ res['xl'] = xp.where(i, xl, xr)
233
+ res['xr'] = xp.where(i, xr, xl)
234
+ res['fl'] = xp.where(i, fl, fr)
235
+ res['fr'] = xp.where(i, fr, fl)
236
+ return shape
237
+
238
+ return eim._loop(work, callback, shape, maxiter, func, args, dtype,
239
+ pre_func_eval, post_func_eval, check_termination,
240
+ post_termination_check, customize_result, res_work_pairs,
241
+ xp=xp)
242
+
243
+
244
+ def _chandrupatla_iv(func, args, xatol, xrtol,
245
+ fatol, frtol, maxiter, callback):
246
+ # Input validation for `_chandrupatla`
247
+
248
+ if not callable(func):
249
+ raise ValueError('`func` must be callable.')
250
+
251
+ if not np.iterable(args):
252
+ args = (args,)
253
+
254
+ # tolerances are floats, not arrays; OK to use NumPy
255
+ tols = np.asarray([xatol if xatol is not None else 1,
256
+ xrtol if xrtol is not None else 1,
257
+ fatol if fatol is not None else 1,
258
+ frtol if frtol is not None else 1])
259
+ if (not np.issubdtype(tols.dtype, np.number) or np.any(tols < 0)
260
+ or np.any(np.isnan(tols)) or tols.shape != (4,)):
261
+ raise ValueError('Tolerances must be non-negative scalars.')
262
+
263
+ if maxiter is not None:
264
+ maxiter_int = int(maxiter)
265
+ if maxiter != maxiter_int or maxiter < 0:
266
+ raise ValueError('`maxiter` must be a non-negative integer.')
267
+
268
+ if callback is not None and not callable(callback):
269
+ raise ValueError('`callback` must be callable.')
270
+
271
+ return func, args, xatol, xrtol, fatol, frtol, maxiter, callback
272
+
273
+
274
+ def _chandrupatla_minimize(func, x1, x2, x3, *, args=(), xatol=None,
275
+ xrtol=None, fatol=None, frtol=None, maxiter=100,
276
+ callback=None):
277
+ """Find the minimizer of an elementwise function.
278
+
279
+ For each element of the output of `func`, `_chandrupatla_minimize` seeks
280
+ the scalar minimizer that minimizes the element. This function allows for
281
+ `x1`, `x2`, `x3`, and the elements of `args` to be arrays of any
282
+ broadcastable shapes.
283
+
284
+ Parameters
285
+ ----------
286
+ func : callable
287
+ The function whose minimizer is desired. The signature must be::
288
+
289
+ func(x: ndarray, *args) -> ndarray
290
+
291
+ where each element of ``x`` is a finite real and ``args`` is a tuple,
292
+ which may contain an arbitrary number of arrays that are broadcastable
293
+ with `x`. ``func`` must be an elementwise function: each element
294
+ ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
295
+ `_chandrupatla` seeks an array ``x`` such that ``func(x)`` is an array
296
+ of minima.
297
+ x1, x2, x3 : array_like
298
+ The abscissae of a standard scalar minimization bracket. A bracket is
299
+ valid if ``x1 < x2 < x3`` and ``func(x1) > func(x2) <= func(x3)``.
300
+ Must be broadcastable with one another and `args`.
301
+ args : tuple, optional
302
+ Additional positional arguments to be passed to `func`. Must be arrays
303
+ broadcastable with `x1`, `x2`, and `x3`. If the callable to be
304
+ differentiated requires arguments that are not broadcastable with `x`,
305
+ wrap that callable with `func` such that `func` accepts only `x` and
306
+ broadcastable arrays.
307
+ xatol, xrtol, fatol, frtol : float, optional
308
+ Absolute and relative tolerances on the minimizer and function value.
309
+ See Notes for details.
310
+ maxiter : int, optional
311
+ The maximum number of iterations of the algorithm to perform.
312
+ callback : callable, optional
313
+ An optional user-supplied function to be called before the first
314
+ iteration and after each iteration.
315
+ Called as ``callback(res)``, where ``res`` is a ``_RichResult``
316
+ similar to that returned by `_chandrupatla_minimize` (but containing
317
+ the current iterate's values of all variables). If `callback` raises a
318
+ ``StopIteration``, the algorithm will terminate immediately and
319
+ `_chandrupatla_minimize` will return a result.
320
+
321
+ Returns
322
+ -------
323
+ res : _RichResult
324
+ An instance of `scipy._lib._util._RichResult` with the following
325
+ attributes. (The descriptions are written as though the values will be
326
+ scalars; however, if `func` returns an array, the outputs will be
327
+ arrays of the same shape.)
328
+
329
+ success : bool
330
+ ``True`` when the algorithm terminated successfully (status ``0``).
331
+ status : int
332
+ An integer representing the exit status of the algorithm.
333
+ ``0`` : The algorithm converged to the specified tolerances.
334
+ ``-1`` : The algorithm encountered an invalid bracket.
335
+ ``-2`` : The maximum number of iterations was reached.
336
+ ``-3`` : A non-finite value was encountered.
337
+ ``-4`` : Iteration was terminated by `callback`.
338
+ ``1`` : The algorithm is proceeding normally (in `callback` only).
339
+ x : float
340
+ The minimizer of the function, if the algorithm terminated
341
+ successfully.
342
+ fun : float
343
+ The value of `func` evaluated at `x`.
344
+ nfev : int
345
+ The number of points at which `func` was evaluated.
346
+ nit : int
347
+ The number of iterations of the algorithm that were performed.
348
+ xl, xm, xr : float
349
+ The final three-point bracket.
350
+ fl, fm, fr : float
351
+ The function value at the bracket points.
352
+
353
+ Notes
354
+ -----
355
+ Implemented based on Chandrupatla's original paper [1]_.
356
+
357
+ If ``x1 < x2 < x3`` are the points of the bracket and ``f1 > f2 <= f3``
358
+ are the values of ``func`` at those points, then the algorithm is
359
+ considered to have converged when ``x3 - x1 <= abs(x2)*xrtol + xatol``
360
+ or ``(f1 - 2*f2 + f3)/2 <= abs(f2)*frtol + fatol``. Note that first of
361
+ these differs from the termination conditions described in [1]_. The
362
+ default values of `xrtol` is the square root of the precision of the
363
+ appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
364
+ number of the appropriate dtype.
365
+
366
+ References
367
+ ----------
368
+ .. [1] Chandrupatla, Tirupathi R. (1998).
369
+ "An efficient quadratic fit-sectioning algorithm for minimization
370
+ without derivatives".
371
+ Computer Methods in Applied Mechanics and Engineering, 152 (1-2),
372
+ 211-217. https://doi.org/10.1016/S0045-7825(97)00190-4
373
+
374
+ See Also
375
+ --------
376
+ golden, brent, bounded
377
+
378
+ Examples
379
+ --------
380
+ >>> from scipy.optimize._chandrupatla import _chandrupatla_minimize
381
+ >>> def f(x, args=1):
382
+ ... return (x - args)**2
383
+ >>> res = _chandrupatla_minimize(f, -5, 0, 5)
384
+ >>> res.x
385
+ 1.0
386
+ >>> c = [1, 1.5, 2]
387
+ >>> res = _chandrupatla_minimize(f, -5, 0, 5, args=(c,))
388
+ >>> res.x
389
+ array([1. , 1.5, 2. ])
390
+ """
391
+ res = _chandrupatla_iv(func, args, xatol, xrtol,
392
+ fatol, frtol, maxiter, callback)
393
+ func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
394
+
395
+ # Initialization
396
+ xs = (x1, x2, x3)
397
+ temp = eim._initialize(func, xs, args)
398
+ func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
399
+ x1, x2, x3 = xs
400
+ f1, f2, f3 = fs
401
+ phi = dtype.type(0.5 + 0.5*5**0.5) # golden ratio
402
+ status = np.full_like(x1, eim._EINPROGRESS, dtype=int) # in progress
403
+ nit, nfev = 0, 3 # three function evaluations performed above
404
+ fatol = np.finfo(dtype).tiny if fatol is None else fatol
405
+ frtol = np.finfo(dtype).tiny if frtol is None else frtol
406
+ xatol = np.finfo(dtype).tiny if xatol is None else xatol
407
+ xrtol = np.sqrt(np.finfo(dtype).eps) if xrtol is None else xrtol
408
+
409
+ # Ensure that x1 < x2 < x3 initially.
410
+ xs, fs = np.vstack((x1, x2, x3)), np.vstack((f1, f2, f3))
411
+ i = np.argsort(xs, axis=0)
412
+ x1, x2, x3 = np.take_along_axis(xs, i, axis=0)
413
+ f1, f2, f3 = np.take_along_axis(fs, i, axis=0)
414
+ q0 = x3.copy() # "At the start, q0 is set at x3..." ([1] after (7))
415
+
416
+ work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=x3, f3=f3, phi=phi,
417
+ xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
418
+ nit=nit, nfev=nfev, status=status, q0=q0, args=args)
419
+ res_work_pairs = [('status', 'status'),
420
+ ('x', 'x2'), ('fun', 'f2'),
421
+ ('nit', 'nit'), ('nfev', 'nfev'),
422
+ ('xl', 'x1'), ('xm', 'x2'), ('xr', 'x3'),
423
+ ('fl', 'f1'), ('fm', 'f2'), ('fr', 'f3')]
424
+
425
+ def pre_func_eval(work):
426
+ # `_check_termination` is called first -> `x3 - x2 > x2 - x1`
427
+ # But let's calculate a few terms that we'll reuse
428
+ x21 = work.x2 - work.x1
429
+ x32 = work.x3 - work.x2
430
+
431
+ # [1] Section 3. "The quadratic minimum point Q1 is calculated using
432
+ # the relations developed in the previous section." [1] Section 2 (5/6)
433
+ A = x21 * (work.f3 - work.f2)
434
+ B = x32 * (work.f1 - work.f2)
435
+ C = A / (A + B)
436
+ # q1 = C * (work.x1 + work.x2) / 2 + (1 - C) * (work.x2 + work.x3) / 2
437
+ q1 = 0.5 * (C*(work.x1 - work.x3) + work.x2 + work.x3) # much faster
438
+ # this is an array, so multiplying by 0.5 does not change dtype
439
+
440
+ # "If Q1 and Q0 are sufficiently close... Q1 is accepted if it is
441
+ # sufficiently away from the inside point x2"
442
+ i = abs(q1 - work.q0) < 0.5 * abs(x21) # [1] (7)
443
+ xi = q1[i]
444
+ # Later, after (9), "If the point Q1 is in a +/- xtol neighborhood of
445
+ # x2, the new point is chosen in the larger interval at a distance
446
+ # tol away from x2."
447
+ # See also QBASIC code after "Accept Ql adjust if close to X2".
448
+ j = abs(q1[i] - work.x2[i]) <= work.xtol[i]
449
+ xi[j] = work.x2[i][j] + np.sign(x32[i][j]) * work.xtol[i][j]
450
+
451
+ # "If condition (7) is not satisfied, golden sectioning of the larger
452
+ # interval is carried out to introduce the new point."
453
+ # (For simplicity, we go ahead and calculate it for all points, but we
454
+ # change the elements for which the condition was satisfied.)
455
+ x = work.x2 + (2 - work.phi) * x32
456
+ x[i] = xi
457
+
458
+ # "We define Q0 as the value of Q1 at the previous iteration."
459
+ work.q0 = q1
460
+ return x
461
+
462
+ def post_func_eval(x, f, work):
463
+ # Standard logic for updating a three-point bracket based on a new
464
+ # point. In QBASIC code, see "IF SGN(X-X2) = SGN(X3-X2) THEN...".
465
+ # There is an awful lot of data copying going on here; this would
466
+ # probably benefit from code optimization or implementation in Pythran.
467
+ i = np.sign(x - work.x2) == np.sign(work.x3 - work.x2)
468
+ xi, x1i, x2i, x3i = x[i], work.x1[i], work.x2[i], work.x3[i],
469
+ fi, f1i, f2i, f3i = f[i], work.f1[i], work.f2[i], work.f3[i]
470
+ j = fi > f2i
471
+ x3i[j], f3i[j] = xi[j], fi[j]
472
+ j = ~j
473
+ x1i[j], f1i[j], x2i[j], f2i[j] = x2i[j], f2i[j], xi[j], fi[j]
474
+
475
+ ni = ~i
476
+ xni, x1ni, x2ni, x3ni = x[ni], work.x1[ni], work.x2[ni], work.x3[ni],
477
+ fni, f1ni, f2ni, f3ni = f[ni], work.f1[ni], work.f2[ni], work.f3[ni]
478
+ j = fni > f2ni
479
+ x1ni[j], f1ni[j] = xni[j], fni[j]
480
+ j = ~j
481
+ x3ni[j], f3ni[j], x2ni[j], f2ni[j] = x2ni[j], f2ni[j], xni[j], fni[j]
482
+
483
+ work.x1[i], work.x2[i], work.x3[i] = x1i, x2i, x3i
484
+ work.f1[i], work.f2[i], work.f3[i] = f1i, f2i, f3i
485
+ work.x1[ni], work.x2[ni], work.x3[ni] = x1ni, x2ni, x3ni,
486
+ work.f1[ni], work.f2[ni], work.f3[ni] = f1ni, f2ni, f3ni
487
+
488
+ def check_termination(work):
489
+ # Check for all terminal conditions and record statuses.
490
+ stop = np.zeros_like(work.x1, dtype=bool) # termination condition met
491
+
492
+ # Bracket is invalid; stop and don't return minimizer/minimum
493
+ i = ((work.f2 > work.f1) | (work.f2 > work.f3))
494
+ work.x2[i], work.f2[i] = np.nan, np.nan
495
+ stop[i], work.status[i] = True, eim._ESIGNERR
496
+
497
+ # Non-finite values; stop and don't return minimizer/minimum
498
+ finite = np.isfinite(work.x1+work.x2+work.x3+work.f1+work.f2+work.f3)
499
+ i = ~(finite | stop)
500
+ work.x2[i], work.f2[i] = np.nan, np.nan
501
+ stop[i], work.status[i] = True, eim._EVALUEERR
502
+
503
+ # [1] Section 3 "Points 1 and 3 are interchanged if necessary to make
504
+ # the (x2, x3) the larger interval."
505
+ # Note: I had used np.choose; this is much faster. This would be a good
506
+ # place to save e.g. `work.x3 - work.x2` for reuse, but I tried and
507
+ # didn't notice a speed boost, so let's keep it simple.
508
+ i = abs(work.x3 - work.x2) < abs(work.x2 - work.x1)
509
+ temp = work.x1[i]
510
+ work.x1[i] = work.x3[i]
511
+ work.x3[i] = temp
512
+ temp = work.f1[i]
513
+ work.f1[i] = work.f3[i]
514
+ work.f3[i] = temp
515
+
516
+ # [1] Section 3 (bottom of page 212)
517
+ # "We set a tolerance value xtol..."
518
+ work.xtol = abs(work.x2) * work.xrtol + work.xatol # [1] (8)
519
+ # "The convergence based on interval is achieved when..."
520
+ # Note: Equality allowed in case of `xtol=0`
521
+ i = abs(work.x3 - work.x2) <= 2 * work.xtol # [1] (9)
522
+
523
+ # "We define ftol using..."
524
+ ftol = abs(work.f2) * work.frtol + work.fatol # [1] (10)
525
+ # "The convergence based on function values is achieved when..."
526
+ # Note 1: modify in place to incorporate tolerance on function value.
527
+ # Note 2: factor of 2 is not in the text; see QBASIC start of DO loop
528
+ i |= (work.f1 - 2 * work.f2 + work.f3) <= 2*ftol # [1] (11)
529
+ i &= ~stop
530
+ stop[i], work.status[i] = True, eim._ECONVERGED
531
+
532
+ return stop
533
+
534
+ def post_termination_check(work):
535
+ pass
536
+
537
+ def customize_result(res, shape):
538
+ xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
539
+ i = res['xl'] < res['xr']
540
+ res['xl'] = np.choose(i, (xr, xl))
541
+ res['xr'] = np.choose(i, (xl, xr))
542
+ res['fl'] = np.choose(i, (fr, fl))
543
+ res['fr'] = np.choose(i, (fl, fr))
544
+ return shape
545
+
546
+ return eim._loop(work, callback, shape, maxiter, func, args, dtype,
547
+ pre_func_eval, post_func_eval, check_termination,
548
+ post_termination_check, customize_result, res_work_pairs,
549
+ xp=xp)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyla_py.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Interface to Constrained Optimization By Linear Approximation
3
+
4
+ Functions
5
+ ---------
6
+ .. autosummary::
7
+ :toctree: generated/
8
+
9
+ fmin_cobyla
10
+
11
+ """
12
+
13
+ import functools
14
+ from threading import RLock
15
+
16
+ import numpy as np
17
+ from scipy.optimize import _cobyla as cobyla
18
+ from ._optimize import (OptimizeResult, _check_unknown_options,
19
+ _prepare_scalar_function)
20
+ try:
21
+ from itertools import izip
22
+ except ImportError:
23
+ izip = zip
24
+
25
+ __all__ = ['fmin_cobyla']
26
+
27
+ # Workaround as _cobyla.minimize is not threadsafe
28
+ # due to an unknown f2py bug and can segfault,
29
+ # see gh-9658.
30
+ _module_lock = RLock()
31
+ def synchronized(func):
32
+ @functools.wraps(func)
33
+ def wrapper(*args, **kwargs):
34
+ with _module_lock:
35
+ return func(*args, **kwargs)
36
+ return wrapper
37
+
38
+ @synchronized
39
+ def fmin_cobyla(func, x0, cons, args=(), consargs=None, rhobeg=1.0,
40
+ rhoend=1e-4, maxfun=1000, disp=None, catol=2e-4,
41
+ *, callback=None):
42
+ """
43
+ Minimize a function using the Constrained Optimization By Linear
44
+ Approximation (COBYLA) method. This method wraps a FORTRAN
45
+ implementation of the algorithm.
46
+
47
+ Parameters
48
+ ----------
49
+ func : callable
50
+ Function to minimize. In the form func(x, \\*args).
51
+ x0 : ndarray
52
+ Initial guess.
53
+ cons : sequence
54
+ Constraint functions; must all be ``>=0`` (a single function
55
+ if only 1 constraint). Each function takes the parameters `x`
56
+ as its first argument, and it can return either a single number or
57
+ an array or list of numbers.
58
+ args : tuple, optional
59
+ Extra arguments to pass to function.
60
+ consargs : tuple, optional
61
+ Extra arguments to pass to constraint functions (default of None means
62
+ use same extra arguments as those passed to func).
63
+ Use ``()`` for no extra arguments.
64
+ rhobeg : float, optional
65
+ Reasonable initial changes to the variables.
66
+ rhoend : float, optional
67
+ Final accuracy in the optimization (not precisely guaranteed). This
68
+ is a lower bound on the size of the trust region.
69
+ disp : {0, 1, 2, 3}, optional
70
+ Controls the frequency of output; 0 implies no output.
71
+ maxfun : int, optional
72
+ Maximum number of function evaluations.
73
+ catol : float, optional
74
+ Absolute tolerance for constraint violations.
75
+ callback : callable, optional
76
+ Called after each iteration, as ``callback(x)``, where ``x`` is the
77
+ current parameter vector.
78
+
79
+ Returns
80
+ -------
81
+ x : ndarray
82
+ The argument that minimises `f`.
83
+
84
+ See also
85
+ --------
86
+ minimize: Interface to minimization algorithms for multivariate
87
+ functions. See the 'COBYLA' `method` in particular.
88
+
89
+ Notes
90
+ -----
91
+ This algorithm is based on linear approximations to the objective
92
+ function and each constraint. We briefly describe the algorithm.
93
+
94
+ Suppose the function is being minimized over k variables. At the
95
+ jth iteration the algorithm has k+1 points v_1, ..., v_(k+1),
96
+ an approximate solution x_j, and a radius RHO_j.
97
+ (i.e., linear plus a constant) approximations to the objective
98
+ function and constraint functions such that their function values
99
+ agree with the linear approximation on the k+1 points v_1,.., v_(k+1).
100
+ This gives a linear program to solve (where the linear approximations
101
+ of the constraint functions are constrained to be non-negative).
102
+
103
+ However, the linear approximations are likely only good
104
+ approximations near the current simplex, so the linear program is
105
+ given the further requirement that the solution, which
106
+ will become x_(j+1), must be within RHO_j from x_j. RHO_j only
107
+ decreases, never increases. The initial RHO_j is rhobeg and the
108
+ final RHO_j is rhoend. In this way COBYLA's iterations behave
109
+ like a trust region algorithm.
110
+
111
+ Additionally, the linear program may be inconsistent, or the
112
+ approximation may give poor improvement. For details about
113
+ how these issues are resolved, as well as how the points v_i are
114
+ updated, refer to the source code or the references below.
115
+
116
+
117
+ References
118
+ ----------
119
+ Powell M.J.D. (1994), "A direct search optimization method that models
120
+ the objective and constraint functions by linear interpolation.", in
121
+ Advances in Optimization and Numerical Analysis, eds. S. Gomez and
122
+ J-P Hennart, Kluwer Academic (Dordrecht), pp. 51-67
123
+
124
+ Powell M.J.D. (1998), "Direct search algorithms for optimization
125
+ calculations", Acta Numerica 7, 287-336
126
+
127
+ Powell M.J.D. (2007), "A view of algorithms for optimization without
128
+ derivatives", Cambridge University Technical Report DAMTP 2007/NA03
129
+
130
+
131
+ Examples
132
+ --------
133
+ Minimize the objective function f(x,y) = x*y subject
134
+ to the constraints x**2 + y**2 < 1 and y > 0::
135
+
136
+ >>> def objective(x):
137
+ ... return x[0]*x[1]
138
+ ...
139
+ >>> def constr1(x):
140
+ ... return 1 - (x[0]**2 + x[1]**2)
141
+ ...
142
+ >>> def constr2(x):
143
+ ... return x[1]
144
+ ...
145
+ >>> from scipy.optimize import fmin_cobyla
146
+ >>> fmin_cobyla(objective, [0.0, 0.1], [constr1, constr2], rhoend=1e-7)
147
+ array([-0.70710685, 0.70710671])
148
+
149
+ The exact solution is (-sqrt(2)/2, sqrt(2)/2).
150
+
151
+
152
+
153
+ """
154
+ err = "cons must be a sequence of callable functions or a single"\
155
+ " callable function."
156
+ try:
157
+ len(cons)
158
+ except TypeError as e:
159
+ if callable(cons):
160
+ cons = [cons]
161
+ else:
162
+ raise TypeError(err) from e
163
+ else:
164
+ for thisfunc in cons:
165
+ if not callable(thisfunc):
166
+ raise TypeError(err)
167
+
168
+ if consargs is None:
169
+ consargs = args
170
+
171
+ # build constraints
172
+ con = tuple({'type': 'ineq', 'fun': c, 'args': consargs} for c in cons)
173
+
174
+ # options
175
+ opts = {'rhobeg': rhobeg,
176
+ 'tol': rhoend,
177
+ 'disp': disp,
178
+ 'maxiter': maxfun,
179
+ 'catol': catol,
180
+ 'callback': callback}
181
+
182
+ sol = _minimize_cobyla(func, x0, args, constraints=con,
183
+ **opts)
184
+ if disp and not sol['success']:
185
+ print(f"COBYLA failed to find a solution: {sol.message}")
186
+ return sol['x']
187
+
188
+
189
+ @synchronized
190
+ def _minimize_cobyla(fun, x0, args=(), constraints=(),
191
+ rhobeg=1.0, tol=1e-4, maxiter=1000,
192
+ disp=False, catol=2e-4, callback=None, bounds=None,
193
+ **unknown_options):
194
+ """
195
+ Minimize a scalar function of one or more variables using the
196
+ Constrained Optimization BY Linear Approximation (COBYLA) algorithm.
197
+
198
+ Options
199
+ -------
200
+ rhobeg : float
201
+ Reasonable initial changes to the variables.
202
+ tol : float
203
+ Final accuracy in the optimization (not precisely guaranteed).
204
+ This is a lower bound on the size of the trust region.
205
+ disp : bool
206
+ Set to True to print convergence messages. If False,
207
+ `verbosity` is ignored as set to 0.
208
+ maxiter : int
209
+ Maximum number of function evaluations.
210
+ catol : float
211
+ Tolerance (absolute) for constraint violations
212
+
213
+ """
214
+ _check_unknown_options(unknown_options)
215
+ maxfun = maxiter
216
+ rhoend = tol
217
+ iprint = int(bool(disp))
218
+
219
+ # check constraints
220
+ if isinstance(constraints, dict):
221
+ constraints = (constraints, )
222
+
223
+ if bounds:
224
+ i_lb = np.isfinite(bounds.lb)
225
+ if np.any(i_lb):
226
+ def lb_constraint(x, *args, **kwargs):
227
+ return x[i_lb] - bounds.lb[i_lb]
228
+
229
+ constraints.append({'type': 'ineq', 'fun': lb_constraint})
230
+
231
+ i_ub = np.isfinite(bounds.ub)
232
+ if np.any(i_ub):
233
+ def ub_constraint(x):
234
+ return bounds.ub[i_ub] - x[i_ub]
235
+
236
+ constraints.append({'type': 'ineq', 'fun': ub_constraint})
237
+
238
+ for ic, con in enumerate(constraints):
239
+ # check type
240
+ try:
241
+ ctype = con['type'].lower()
242
+ except KeyError as e:
243
+ raise KeyError('Constraint %d has no type defined.' % ic) from e
244
+ except TypeError as e:
245
+ raise TypeError('Constraints must be defined using a '
246
+ 'dictionary.') from e
247
+ except AttributeError as e:
248
+ raise TypeError("Constraint's type must be a string.") from e
249
+ else:
250
+ if ctype != 'ineq':
251
+ raise ValueError("Constraints of type '%s' not handled by "
252
+ "COBYLA." % con['type'])
253
+
254
+ # check function
255
+ if 'fun' not in con:
256
+ raise KeyError('Constraint %d has no function defined.' % ic)
257
+
258
+ # check extra arguments
259
+ if 'args' not in con:
260
+ con['args'] = ()
261
+
262
+ # m is the total number of constraint values
263
+ # it takes into account that some constraints may be vector-valued
264
+ cons_lengths = []
265
+ for c in constraints:
266
+ f = c['fun'](x0, *c['args'])
267
+ try:
268
+ cons_length = len(f)
269
+ except TypeError:
270
+ cons_length = 1
271
+ cons_lengths.append(cons_length)
272
+ m = sum(cons_lengths)
273
+
274
+ # create the ScalarFunction, cobyla doesn't require derivative function
275
+ def _jac(x, *args):
276
+ return None
277
+
278
+ sf = _prepare_scalar_function(fun, x0, args=args, jac=_jac)
279
+
280
+ def calcfc(x, con):
281
+ f = sf.fun(x)
282
+ i = 0
283
+ for size, c in izip(cons_lengths, constraints):
284
+ con[i: i + size] = c['fun'](x, *c['args'])
285
+ i += size
286
+ return f
287
+
288
+ def wrapped_callback(x):
289
+ if callback is not None:
290
+ callback(np.copy(x))
291
+
292
+ info = np.zeros(4, np.float64)
293
+ xopt, info = cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
294
+ rhoend=rhoend, iprint=iprint, maxfun=maxfun,
295
+ dinfo=info, callback=wrapped_callback)
296
+
297
+ if info[3] > catol:
298
+ # Check constraint violation
299
+ info[0] = 4
300
+
301
+ return OptimizeResult(x=xopt,
302
+ status=int(info[0]),
303
+ success=info[0] == 1,
304
+ message={1: 'Optimization terminated successfully.',
305
+ 2: 'Maximum number of function evaluations '
306
+ 'has been exceeded.',
307
+ 3: 'Rounding errors are becoming damaging '
308
+ 'in COBYLA subroutine.',
309
+ 4: 'Did not converge to a solution '
310
+ 'satisfying the constraints. See '
311
+ '`maxcv` for magnitude of violation.',
312
+ 5: 'NaN result encountered.'
313
+ }.get(info[0], 'Unknown exit status.'),
314
+ nfev=int(info[1]),
315
+ fun=info[2],
316
+ maxcv=info[3])
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_cobyqa_py.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from ._optimize import _check_unknown_options
4
+
5
+
6
+ def _minimize_cobyqa(fun, x0, args=(), bounds=None, constraints=(),
7
+ callback=None, disp=False, maxfev=None, maxiter=None,
8
+ f_target=-np.inf, feasibility_tol=1e-8,
9
+ initial_tr_radius=1.0, final_tr_radius=1e-6, scale=False,
10
+ **unknown_options):
11
+ """
12
+ Minimize a scalar function of one or more variables using the
13
+ Constrained Optimization BY Quadratic Approximations (COBYQA) algorithm [1]_.
14
+
15
+ .. versionadded:: 1.14.0
16
+
17
+ Options
18
+ -------
19
+ disp : bool
20
+ Set to True to print information about the optimization procedure.
21
+ maxfev : int
22
+ Maximum number of function evaluations.
23
+ maxiter : int
24
+ Maximum number of iterations.
25
+ f_target : float
26
+ Target value for the objective function. The optimization procedure is
27
+ terminated when the objective function value of a feasible point (see
28
+ `feasibility_tol` below) is less than or equal to this target.
29
+ feasibility_tol : float
30
+ Absolute tolerance for the constraint violation.
31
+ initial_tr_radius : float
32
+ Initial trust-region radius. Typically, this value should be in the
33
+ order of one tenth of the greatest expected change to the variables.
34
+ final_tr_radius : float
35
+ Final trust-region radius. It should indicate the accuracy required in
36
+ the final values of the variables. If provided, this option overrides
37
+ the value of `tol` in the `minimize` function.
38
+ scale : bool
39
+ Set to True to scale the variables according to the bounds. If True and
40
+ if all the lower and upper bounds are finite, the variables are scaled
41
+ to be within the range :math:`[-1, 1]`. If any of the lower or upper
42
+ bounds is infinite, the variables are not scaled.
43
+
44
+ References
45
+ ----------
46
+ .. [1] COBYQA
47
+ https://www.cobyqa.com/stable/
48
+ """
49
+ from .._lib.cobyqa import minimize # import here to avoid circular imports
50
+
51
+ _check_unknown_options(unknown_options)
52
+ options = {
53
+ 'disp': bool(disp),
54
+ 'maxfev': int(maxfev) if maxfev is not None else 500 * len(x0),
55
+ 'maxiter': int(maxiter) if maxiter is not None else 1000 * len(x0),
56
+ 'target': float(f_target),
57
+ 'feasibility_tol': float(feasibility_tol),
58
+ 'radius_init': float(initial_tr_radius),
59
+ 'radius_final': float(final_tr_radius),
60
+ 'scale': bool(scale),
61
+ }
62
+ return minimize(fun, x0, args, bounds, constraints, callback, options)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_constraints.py ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constraints definition for minimize."""
2
+ import numpy as np
3
+ from ._hessian_update_strategy import BFGS
4
+ from ._differentiable_functions import (
5
+ VectorFunction, LinearVectorFunction, IdentityVectorFunction)
6
+ from ._optimize import OptimizeWarning
7
+ from warnings import warn, catch_warnings, simplefilter, filterwarnings
8
+ from scipy.sparse import issparse
9
+
10
+
11
+ def _arr_to_scalar(x):
12
+ # If x is a numpy array, return x.item(). This will
13
+ # fail if the array has more than one element.
14
+ return x.item() if isinstance(x, np.ndarray) else x
15
+
16
+
17
+ class NonlinearConstraint:
18
+ """Nonlinear constraint on the variables.
19
+
20
+ The constraint has the general inequality form::
21
+
22
+ lb <= fun(x) <= ub
23
+
24
+ Here the vector of independent variables x is passed as ndarray of shape
25
+ (n,) and ``fun`` returns a vector with m components.
26
+
27
+ It is possible to use equal bounds to represent an equality constraint or
28
+ infinite bounds to represent a one-sided constraint.
29
+
30
+ Parameters
31
+ ----------
32
+ fun : callable
33
+ The function defining the constraint.
34
+ The signature is ``fun(x) -> array_like, shape (m,)``.
35
+ lb, ub : array_like
36
+ Lower and upper bounds on the constraint. Each array must have the
37
+ shape (m,) or be a scalar, in the latter case a bound will be the same
38
+ for all components of the constraint. Use ``np.inf`` with an
39
+ appropriate sign to specify a one-sided constraint.
40
+ Set components of `lb` and `ub` equal to represent an equality
41
+ constraint. Note that you can mix constraints of different types:
42
+ interval, one-sided or equality, by setting different components of
43
+ `lb` and `ub` as necessary.
44
+ jac : {callable, '2-point', '3-point', 'cs'}, optional
45
+ Method of computing the Jacobian matrix (an m-by-n matrix,
46
+ where element (i, j) is the partial derivative of f[i] with
47
+ respect to x[j]). The keywords {'2-point', '3-point',
48
+ 'cs'} select a finite difference scheme for the numerical estimation.
49
+ A callable must have the following signature:
50
+ ``jac(x) -> {ndarray, sparse matrix}, shape (m, n)``.
51
+ Default is '2-point'.
52
+ hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy, None}, optional
53
+ Method for computing the Hessian matrix. The keywords
54
+ {'2-point', '3-point', 'cs'} select a finite difference scheme for
55
+ numerical estimation. Alternatively, objects implementing
56
+ `HessianUpdateStrategy` interface can be used to approximate the
57
+ Hessian. Currently available implementations are:
58
+
59
+ - `BFGS` (default option)
60
+ - `SR1`
61
+
62
+ A callable must return the Hessian matrix of ``dot(fun, v)`` and
63
+ must have the following signature:
64
+ ``hess(x, v) -> {LinearOperator, sparse matrix, array_like}, shape (n, n)``.
65
+ Here ``v`` is ndarray with shape (m,) containing Lagrange multipliers.
66
+ keep_feasible : array_like of bool, optional
67
+ Whether to keep the constraint components feasible throughout
68
+ iterations. A single value set this property for all components.
69
+ Default is False. Has no effect for equality constraints.
70
+ finite_diff_rel_step: None or array_like, optional
71
+ Relative step size for the finite difference approximation. Default is
72
+ None, which will select a reasonable value automatically depending
73
+ on a finite difference scheme.
74
+ finite_diff_jac_sparsity: {None, array_like, sparse matrix}, optional
75
+ Defines the sparsity structure of the Jacobian matrix for finite
76
+ difference estimation, its shape must be (m, n). If the Jacobian has
77
+ only few non-zero elements in *each* row, providing the sparsity
78
+ structure will greatly speed up the computations. A zero entry means
79
+ that a corresponding element in the Jacobian is identically zero.
80
+ If provided, forces the use of 'lsmr' trust-region solver.
81
+ If None (default) then dense differencing will be used.
82
+
83
+ Notes
84
+ -----
85
+ Finite difference schemes {'2-point', '3-point', 'cs'} may be used for
86
+ approximating either the Jacobian or the Hessian. We, however, do not allow
87
+ its use for approximating both simultaneously. Hence whenever the Jacobian
88
+ is estimated via finite-differences, we require the Hessian to be estimated
89
+ using one of the quasi-Newton strategies.
90
+
91
+ The scheme 'cs' is potentially the most accurate, but requires the function
92
+ to correctly handles complex inputs and be analytically continuable to the
93
+ complex plane. The scheme '3-point' is more accurate than '2-point' but
94
+ requires twice as many operations.
95
+
96
+ Examples
97
+ --------
98
+ Constrain ``x[0] < sin(x[1]) + 1.9``
99
+
100
+ >>> from scipy.optimize import NonlinearConstraint
101
+ >>> import numpy as np
102
+ >>> con = lambda x: x[0] - np.sin(x[1])
103
+ >>> nlc = NonlinearConstraint(con, -np.inf, 1.9)
104
+
105
+ """
106
+ def __init__(self, fun, lb, ub, jac='2-point', hess=BFGS(),
107
+ keep_feasible=False, finite_diff_rel_step=None,
108
+ finite_diff_jac_sparsity=None):
109
+ self.fun = fun
110
+ self.lb = lb
111
+ self.ub = ub
112
+ self.finite_diff_rel_step = finite_diff_rel_step
113
+ self.finite_diff_jac_sparsity = finite_diff_jac_sparsity
114
+ self.jac = jac
115
+ self.hess = hess
116
+ self.keep_feasible = keep_feasible
117
+
118
+
119
+ class LinearConstraint:
120
+ """Linear constraint on the variables.
121
+
122
+ The constraint has the general inequality form::
123
+
124
+ lb <= A.dot(x) <= ub
125
+
126
+ Here the vector of independent variables x is passed as ndarray of shape
127
+ (n,) and the matrix A has shape (m, n).
128
+
129
+ It is possible to use equal bounds to represent an equality constraint or
130
+ infinite bounds to represent a one-sided constraint.
131
+
132
+ Parameters
133
+ ----------
134
+ A : {array_like, sparse matrix}, shape (m, n)
135
+ Matrix defining the constraint.
136
+ lb, ub : dense array_like, optional
137
+ Lower and upper limits on the constraint. Each array must have the
138
+ shape (m,) or be a scalar, in the latter case a bound will be the same
139
+ for all components of the constraint. Use ``np.inf`` with an
140
+ appropriate sign to specify a one-sided constraint.
141
+ Set components of `lb` and `ub` equal to represent an equality
142
+ constraint. Note that you can mix constraints of different types:
143
+ interval, one-sided or equality, by setting different components of
144
+ `lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
145
+ and ``ub = np.inf`` (no limits).
146
+ keep_feasible : dense array_like of bool, optional
147
+ Whether to keep the constraint components feasible throughout
148
+ iterations. A single value set this property for all components.
149
+ Default is False. Has no effect for equality constraints.
150
+ """
151
+ def _input_validation(self):
152
+ if self.A.ndim != 2:
153
+ message = "`A` must have exactly two dimensions."
154
+ raise ValueError(message)
155
+
156
+ try:
157
+ shape = self.A.shape[0:1]
158
+ self.lb = np.broadcast_to(self.lb, shape)
159
+ self.ub = np.broadcast_to(self.ub, shape)
160
+ self.keep_feasible = np.broadcast_to(self.keep_feasible, shape)
161
+ except ValueError:
162
+ message = ("`lb`, `ub`, and `keep_feasible` must be broadcastable "
163
+ "to shape `A.shape[0:1]`")
164
+ raise ValueError(message)
165
+
166
+ def __init__(self, A, lb=-np.inf, ub=np.inf, keep_feasible=False):
167
+ if not issparse(A):
168
+ # In some cases, if the constraint is not valid, this emits a
169
+ # VisibleDeprecationWarning about ragged nested sequences
170
+ # before eventually causing an error. `scipy.optimize.milp` would
171
+ # prefer that this just error out immediately so it can handle it
172
+ # rather than concerning the user.
173
+ with catch_warnings():
174
+ simplefilter("error")
175
+ self.A = np.atleast_2d(A).astype(np.float64)
176
+ else:
177
+ self.A = A
178
+ if issparse(lb) or issparse(ub):
179
+ raise ValueError("Constraint limits must be dense arrays.")
180
+ self.lb = np.atleast_1d(lb).astype(np.float64)
181
+ self.ub = np.atleast_1d(ub).astype(np.float64)
182
+
183
+ if issparse(keep_feasible):
184
+ raise ValueError("`keep_feasible` must be a dense array.")
185
+ self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
186
+ self._input_validation()
187
+
188
+ def residual(self, x):
189
+ """
190
+ Calculate the residual between the constraint function and the limits
191
+
192
+ For a linear constraint of the form::
193
+
194
+ lb <= A@x <= ub
195
+
196
+ the lower and upper residuals between ``A@x`` and the limits are values
197
+ ``sl`` and ``sb`` such that::
198
+
199
+ lb + sl == A@x == ub - sb
200
+
201
+ When all elements of ``sl`` and ``sb`` are positive, all elements of
202
+ the constraint are satisfied; a negative element in ``sl`` or ``sb``
203
+ indicates that the corresponding element of the constraint is not
204
+ satisfied.
205
+
206
+ Parameters
207
+ ----------
208
+ x: array_like
209
+ Vector of independent variables
210
+
211
+ Returns
212
+ -------
213
+ sl, sb : array-like
214
+ The lower and upper residuals
215
+ """
216
+ return self.A@x - self.lb, self.ub - self.A@x
217
+
218
+
219
+ class Bounds:
220
+ """Bounds constraint on the variables.
221
+
222
+ The constraint has the general inequality form::
223
+
224
+ lb <= x <= ub
225
+
226
+ It is possible to use equal bounds to represent an equality constraint or
227
+ infinite bounds to represent a one-sided constraint.
228
+
229
+ Parameters
230
+ ----------
231
+ lb, ub : dense array_like, optional
232
+ Lower and upper bounds on independent variables. `lb`, `ub`, and
233
+ `keep_feasible` must be the same shape or broadcastable.
234
+ Set components of `lb` and `ub` equal
235
+ to fix a variable. Use ``np.inf`` with an appropriate sign to disable
236
+ bounds on all or some variables. Note that you can mix constraints of
237
+ different types: interval, one-sided or equality, by setting different
238
+ components of `lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
239
+ and ``ub = np.inf`` (no bounds).
240
+ keep_feasible : dense array_like of bool, optional
241
+ Whether to keep the constraint components feasible throughout
242
+ iterations. Must be broadcastable with `lb` and `ub`.
243
+ Default is False. Has no effect for equality constraints.
244
+ """
245
+ def _input_validation(self):
246
+ try:
247
+ res = np.broadcast_arrays(self.lb, self.ub, self.keep_feasible)
248
+ self.lb, self.ub, self.keep_feasible = res
249
+ except ValueError:
250
+ message = "`lb`, `ub`, and `keep_feasible` must be broadcastable."
251
+ raise ValueError(message)
252
+
253
+ def __init__(self, lb=-np.inf, ub=np.inf, keep_feasible=False):
254
+ if issparse(lb) or issparse(ub):
255
+ raise ValueError("Lower and upper bounds must be dense arrays.")
256
+ self.lb = np.atleast_1d(lb)
257
+ self.ub = np.atleast_1d(ub)
258
+
259
+ if issparse(keep_feasible):
260
+ raise ValueError("`keep_feasible` must be a dense array.")
261
+ self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
262
+ self._input_validation()
263
+
264
+ def __repr__(self):
265
+ start = f"{type(self).__name__}({self.lb!r}, {self.ub!r}"
266
+ if np.any(self.keep_feasible):
267
+ end = f", keep_feasible={self.keep_feasible!r})"
268
+ else:
269
+ end = ")"
270
+ return start + end
271
+
272
+ def residual(self, x):
273
+ """Calculate the residual (slack) between the input and the bounds
274
+
275
+ For a bound constraint of the form::
276
+
277
+ lb <= x <= ub
278
+
279
+ the lower and upper residuals between `x` and the bounds are values
280
+ ``sl`` and ``sb`` such that::
281
+
282
+ lb + sl == x == ub - sb
283
+
284
+ When all elements of ``sl`` and ``sb`` are positive, all elements of
285
+ ``x`` lie within the bounds; a negative element in ``sl`` or ``sb``
286
+ indicates that the corresponding element of ``x`` is out of bounds.
287
+
288
+ Parameters
289
+ ----------
290
+ x: array_like
291
+ Vector of independent variables
292
+
293
+ Returns
294
+ -------
295
+ sl, sb : array-like
296
+ The lower and upper residuals
297
+ """
298
+ return x - self.lb, self.ub - x
299
+
300
+
301
+ class PreparedConstraint:
302
+ """Constraint prepared from a user defined constraint.
303
+
304
+ On creation it will check whether a constraint definition is valid and
305
+ the initial point is feasible. If created successfully, it will contain
306
+ the attributes listed below.
307
+
308
+ Parameters
309
+ ----------
310
+ constraint : {NonlinearConstraint, LinearConstraint`, Bounds}
311
+ Constraint to check and prepare.
312
+ x0 : array_like
313
+ Initial vector of independent variables.
314
+ sparse_jacobian : bool or None, optional
315
+ If bool, then the Jacobian of the constraint will be converted
316
+ to the corresponded format if necessary. If None (default), such
317
+ conversion is not made.
318
+ finite_diff_bounds : 2-tuple, optional
319
+ Lower and upper bounds on the independent variables for the finite
320
+ difference approximation, if applicable. Defaults to no bounds.
321
+
322
+ Attributes
323
+ ----------
324
+ fun : {VectorFunction, LinearVectorFunction, IdentityVectorFunction}
325
+ Function defining the constraint wrapped by one of the convenience
326
+ classes.
327
+ bounds : 2-tuple
328
+ Contains lower and upper bounds for the constraints --- lb and ub.
329
+ These are converted to ndarray and have a size equal to the number of
330
+ the constraints.
331
+ keep_feasible : ndarray
332
+ Array indicating which components must be kept feasible with a size
333
+ equal to the number of the constraints.
334
+ """
335
+ def __init__(self, constraint, x0, sparse_jacobian=None,
336
+ finite_diff_bounds=(-np.inf, np.inf)):
337
+ if isinstance(constraint, NonlinearConstraint):
338
+ fun = VectorFunction(constraint.fun, x0,
339
+ constraint.jac, constraint.hess,
340
+ constraint.finite_diff_rel_step,
341
+ constraint.finite_diff_jac_sparsity,
342
+ finite_diff_bounds, sparse_jacobian)
343
+ elif isinstance(constraint, LinearConstraint):
344
+ fun = LinearVectorFunction(constraint.A, x0, sparse_jacobian)
345
+ elif isinstance(constraint, Bounds):
346
+ fun = IdentityVectorFunction(x0, sparse_jacobian)
347
+ else:
348
+ raise ValueError("`constraint` of an unknown type is passed.")
349
+
350
+ m = fun.m
351
+
352
+ lb = np.asarray(constraint.lb, dtype=float)
353
+ ub = np.asarray(constraint.ub, dtype=float)
354
+ keep_feasible = np.asarray(constraint.keep_feasible, dtype=bool)
355
+
356
+ lb = np.broadcast_to(lb, m)
357
+ ub = np.broadcast_to(ub, m)
358
+ keep_feasible = np.broadcast_to(keep_feasible, m)
359
+
360
+ if keep_feasible.shape != (m,):
361
+ raise ValueError("`keep_feasible` has a wrong shape.")
362
+
363
+ mask = keep_feasible & (lb != ub)
364
+ f0 = fun.f
365
+ if np.any(f0[mask] < lb[mask]) or np.any(f0[mask] > ub[mask]):
366
+ raise ValueError("`x0` is infeasible with respect to some "
367
+ "inequality constraint with `keep_feasible` "
368
+ "set to True.")
369
+
370
+ self.fun = fun
371
+ self.bounds = (lb, ub)
372
+ self.keep_feasible = keep_feasible
373
+
374
+ def violation(self, x):
375
+ """How much the constraint is exceeded by.
376
+
377
+ Parameters
378
+ ----------
379
+ x : array-like
380
+ Vector of independent variables
381
+
382
+ Returns
383
+ -------
384
+ excess : array-like
385
+ How much the constraint is exceeded by, for each of the
386
+ constraints specified by `PreparedConstraint.fun`.
387
+ """
388
+ with catch_warnings():
389
+ # Ignore the following warning, it's not important when
390
+ # figuring out total violation
391
+ # UserWarning: delta_grad == 0.0. Check if the approximated
392
+ # function is linear
393
+ filterwarnings("ignore", "delta_grad", UserWarning)
394
+ ev = self.fun.fun(np.asarray(x))
395
+
396
+ excess_lb = np.maximum(self.bounds[0] - ev, 0)
397
+ excess_ub = np.maximum(ev - self.bounds[1], 0)
398
+
399
+ return excess_lb + excess_ub
400
+
401
+
402
+ def new_bounds_to_old(lb, ub, n):
403
+ """Convert the new bounds representation to the old one.
404
+
405
+ The new representation is a tuple (lb, ub) and the old one is a list
406
+ containing n tuples, ith containing lower and upper bound on a ith
407
+ variable.
408
+ If any of the entries in lb/ub are -np.inf/np.inf they are replaced by
409
+ None.
410
+ """
411
+ lb = np.broadcast_to(lb, n)
412
+ ub = np.broadcast_to(ub, n)
413
+
414
+ lb = [float(x) if x > -np.inf else None for x in lb]
415
+ ub = [float(x) if x < np.inf else None for x in ub]
416
+
417
+ return list(zip(lb, ub))
418
+
419
+
420
+ def old_bound_to_new(bounds):
421
+ """Convert the old bounds representation to the new one.
422
+
423
+ The new representation is a tuple (lb, ub) and the old one is a list
424
+ containing n tuples, ith containing lower and upper bound on a ith
425
+ variable.
426
+ If any of the entries in lb/ub are None they are replaced by
427
+ -np.inf/np.inf.
428
+ """
429
+ lb, ub = zip(*bounds)
430
+
431
+ # Convert occurrences of None to -inf or inf, and replace occurrences of
432
+ # any numpy array x with x.item(). Then wrap the results in numpy arrays.
433
+ lb = np.array([float(_arr_to_scalar(x)) if x is not None else -np.inf
434
+ for x in lb])
435
+ ub = np.array([float(_arr_to_scalar(x)) if x is not None else np.inf
436
+ for x in ub])
437
+
438
+ return lb, ub
439
+
440
+
441
+ def strict_bounds(lb, ub, keep_feasible, n_vars):
442
+ """Remove bounds which are not asked to be kept feasible."""
443
+ strict_lb = np.resize(lb, n_vars).astype(float)
444
+ strict_ub = np.resize(ub, n_vars).astype(float)
445
+ keep_feasible = np.resize(keep_feasible, n_vars)
446
+ strict_lb[~keep_feasible] = -np.inf
447
+ strict_ub[~keep_feasible] = np.inf
448
+ return strict_lb, strict_ub
449
+
450
+
451
+ def new_constraint_to_old(con, x0):
452
+ """
453
+ Converts new-style constraint objects to old-style constraint dictionaries.
454
+ """
455
+ if isinstance(con, NonlinearConstraint):
456
+ if (con.finite_diff_jac_sparsity is not None or
457
+ con.finite_diff_rel_step is not None or
458
+ not isinstance(con.hess, BFGS) or # misses user specified BFGS
459
+ con.keep_feasible):
460
+ warn("Constraint options `finite_diff_jac_sparsity`, "
461
+ "`finite_diff_rel_step`, `keep_feasible`, and `hess`"
462
+ "are ignored by this method.",
463
+ OptimizeWarning, stacklevel=3)
464
+
465
+ fun = con.fun
466
+ if callable(con.jac):
467
+ jac = con.jac
468
+ else:
469
+ jac = None
470
+
471
+ else: # LinearConstraint
472
+ if np.any(con.keep_feasible):
473
+ warn("Constraint option `keep_feasible` is ignored by this method.",
474
+ OptimizeWarning, stacklevel=3)
475
+
476
+ A = con.A
477
+ if issparse(A):
478
+ A = A.toarray()
479
+ def fun(x):
480
+ return np.dot(A, x)
481
+ def jac(x):
482
+ return A
483
+
484
+ # FIXME: when bugs in VectorFunction/LinearVectorFunction are worked out,
485
+ # use pcon.fun.fun and pcon.fun.jac. Until then, get fun/jac above.
486
+ pcon = PreparedConstraint(con, x0)
487
+ lb, ub = pcon.bounds
488
+
489
+ i_eq = lb == ub
490
+ i_bound_below = np.logical_xor(lb != -np.inf, i_eq)
491
+ i_bound_above = np.logical_xor(ub != np.inf, i_eq)
492
+ i_unbounded = np.logical_and(lb == -np.inf, ub == np.inf)
493
+
494
+ if np.any(i_unbounded):
495
+ warn("At least one constraint is unbounded above and below. Such "
496
+ "constraints are ignored.",
497
+ OptimizeWarning, stacklevel=3)
498
+
499
+ ceq = []
500
+ if np.any(i_eq):
501
+ def f_eq(x):
502
+ y = np.array(fun(x)).flatten()
503
+ return y[i_eq] - lb[i_eq]
504
+ ceq = [{"type": "eq", "fun": f_eq}]
505
+
506
+ if jac is not None:
507
+ def j_eq(x):
508
+ dy = jac(x)
509
+ if issparse(dy):
510
+ dy = dy.toarray()
511
+ dy = np.atleast_2d(dy)
512
+ return dy[i_eq, :]
513
+ ceq[0]["jac"] = j_eq
514
+
515
+ cineq = []
516
+ n_bound_below = np.sum(i_bound_below)
517
+ n_bound_above = np.sum(i_bound_above)
518
+ if n_bound_below + n_bound_above:
519
+ def f_ineq(x):
520
+ y = np.zeros(n_bound_below + n_bound_above)
521
+ y_all = np.array(fun(x)).flatten()
522
+ y[:n_bound_below] = y_all[i_bound_below] - lb[i_bound_below]
523
+ y[n_bound_below:] = -(y_all[i_bound_above] - ub[i_bound_above])
524
+ return y
525
+ cineq = [{"type": "ineq", "fun": f_ineq}]
526
+
527
+ if jac is not None:
528
+ def j_ineq(x):
529
+ dy = np.zeros((n_bound_below + n_bound_above, len(x0)))
530
+ dy_all = jac(x)
531
+ if issparse(dy_all):
532
+ dy_all = dy_all.toarray()
533
+ dy_all = np.atleast_2d(dy_all)
534
+ dy[:n_bound_below, :] = dy_all[i_bound_below]
535
+ dy[n_bound_below:, :] = -dy_all[i_bound_above]
536
+ return dy
537
+ cineq[0]["jac"] = j_ineq
538
+
539
+ old_constraints = ceq + cineq
540
+
541
+ if len(old_constraints) > 1:
542
+ warn("Equality and inequality constraints are specified in the same "
543
+ "element of the constraint list. For efficient use with this "
544
+ "method, equality and inequality constraints should be specified "
545
+ "in separate elements of the constraint list. ",
546
+ OptimizeWarning, stacklevel=3)
547
+ return old_constraints
548
+
549
+
550
+ def old_constraint_to_new(ic, con):
551
+ """
552
+ Converts old-style constraint dictionaries to new-style constraint objects.
553
+ """
554
+ # check type
555
+ try:
556
+ ctype = con['type'].lower()
557
+ except KeyError as e:
558
+ raise KeyError('Constraint %d has no type defined.' % ic) from e
559
+ except TypeError as e:
560
+ raise TypeError(
561
+ 'Constraints must be a sequence of dictionaries.'
562
+ ) from e
563
+ except AttributeError as e:
564
+ raise TypeError("Constraint's type must be a string.") from e
565
+ else:
566
+ if ctype not in ['eq', 'ineq']:
567
+ raise ValueError("Unknown constraint type '%s'." % con['type'])
568
+ if 'fun' not in con:
569
+ raise ValueError('Constraint %d has no function defined.' % ic)
570
+
571
+ lb = 0
572
+ if ctype == 'eq':
573
+ ub = 0
574
+ else:
575
+ ub = np.inf
576
+
577
+ jac = '2-point'
578
+ if 'args' in con:
579
+ args = con['args']
580
+ def fun(x):
581
+ return con["fun"](x, *args)
582
+ if 'jac' in con:
583
+ def jac(x):
584
+ return con["jac"](x, *args)
585
+ else:
586
+ fun = con['fun']
587
+ if 'jac' in con:
588
+ jac = con['jac']
589
+
590
+ return NonlinearConstraint(fun, lb, ub, jac)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dcsrch.py ADDED
@@ -0,0 +1,728 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ """
4
+ # 2023 - ported from minpack2.dcsrch, dcstep (Fortran) to Python
5
+ c MINPACK-1 Project. June 1983.
6
+ c Argonne National Laboratory.
7
+ c Jorge J. More' and David J. Thuente.
8
+ c
9
+ c MINPACK-2 Project. November 1993.
10
+ c Argonne National Laboratory and University of Minnesota.
11
+ c Brett M. Averick, Richard G. Carter, and Jorge J. More'.
12
+ """
13
+
14
+ # NOTE this file was linted by black on first commit, and can be kept that way.
15
+
16
+
17
+ class DCSRCH:
18
+ """
19
+ Parameters
20
+ ----------
21
+ phi : callable phi(alpha)
22
+ Function at point `alpha`
23
+ derphi : callable phi'(alpha)
24
+ Objective function derivative. Returns a scalar.
25
+ ftol : float
26
+ A nonnegative tolerance for the sufficient decrease condition.
27
+ gtol : float
28
+ A nonnegative tolerance for the curvature condition.
29
+ xtol : float
30
+ A nonnegative relative tolerance for an acceptable step. The
31
+ subroutine exits with a warning if the relative difference between
32
+ sty and stx is less than xtol.
33
+ stpmin : float
34
+ A nonnegative lower bound for the step.
35
+ stpmax :
36
+ A nonnegative upper bound for the step.
37
+
38
+ Notes
39
+ -----
40
+
41
+ This subroutine finds a step that satisfies a sufficient
42
+ decrease condition and a curvature condition.
43
+
44
+ Each call of the subroutine updates an interval with
45
+ endpoints stx and sty. The interval is initially chosen
46
+ so that it contains a minimizer of the modified function
47
+
48
+ psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
49
+
50
+ If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
51
+ interval is chosen so that it contains a minimizer of f.
52
+
53
+ The algorithm is designed to find a step that satisfies
54
+ the sufficient decrease condition
55
+
56
+ f(stp) <= f(0) + ftol*stp*f'(0),
57
+
58
+ and the curvature condition
59
+
60
+ abs(f'(stp)) <= gtol*abs(f'(0)).
61
+
62
+ If ftol is less than gtol and if, for example, the function
63
+ is bounded below, then there is always a step which satisfies
64
+ both conditions.
65
+
66
+ If no step can be found that satisfies both conditions, then
67
+ the algorithm stops with a warning. In this case stp only
68
+ satisfies the sufficient decrease condition.
69
+
70
+ A typical invocation of dcsrch has the following outline:
71
+
72
+ Evaluate the function at stp = 0.0d0; store in f.
73
+ Evaluate the gradient at stp = 0.0d0; store in g.
74
+ Choose a starting step stp.
75
+
76
+ task = 'START'
77
+ 10 continue
78
+ call dcsrch(stp,f,g,ftol,gtol,xtol,task,stpmin,stpmax,
79
+ isave,dsave)
80
+ if (task .eq. 'FG') then
81
+ Evaluate the function and the gradient at stp
82
+ go to 10
83
+ end if
84
+
85
+ NOTE: The user must not alter work arrays between calls.
86
+
87
+ The subroutine statement is
88
+
89
+ subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
90
+ task,isave,dsave)
91
+ where
92
+
93
+ stp is a double precision variable.
94
+ On entry stp is the current estimate of a satisfactory
95
+ step. On initial entry, a positive initial estimate
96
+ must be provided.
97
+ On exit stp is the current estimate of a satisfactory step
98
+ if task = 'FG'. If task = 'CONV' then stp satisfies
99
+ the sufficient decrease and curvature condition.
100
+
101
+ f is a double precision variable.
102
+ On initial entry f is the value of the function at 0.
103
+ On subsequent entries f is the value of the
104
+ function at stp.
105
+ On exit f is the value of the function at stp.
106
+
107
+ g is a double precision variable.
108
+ On initial entry g is the derivative of the function at 0.
109
+ On subsequent entries g is the derivative of the
110
+ function at stp.
111
+ On exit g is the derivative of the function at stp.
112
+
113
+ ftol is a double precision variable.
114
+ On entry ftol specifies a nonnegative tolerance for the
115
+ sufficient decrease condition.
116
+ On exit ftol is unchanged.
117
+
118
+ gtol is a double precision variable.
119
+ On entry gtol specifies a nonnegative tolerance for the
120
+ curvature condition.
121
+ On exit gtol is unchanged.
122
+
123
+ xtol is a double precision variable.
124
+ On entry xtol specifies a nonnegative relative tolerance
125
+ for an acceptable step. The subroutine exits with a
126
+ warning if the relative difference between sty and stx
127
+ is less than xtol.
128
+
129
+ On exit xtol is unchanged.
130
+
131
+ task is a character variable of length at least 60.
132
+ On initial entry task must be set to 'START'.
133
+ On exit task indicates the required action:
134
+
135
+ If task(1:2) = 'FG' then evaluate the function and
136
+ derivative at stp and call dcsrch again.
137
+
138
+ If task(1:4) = 'CONV' then the search is successful.
139
+
140
+ If task(1:4) = 'WARN' then the subroutine is not able
141
+ to satisfy the convergence conditions. The exit value of
142
+ stp contains the best point found during the search.
143
+
144
+ If task(1:5) = 'ERROR' then there is an error in the
145
+ input arguments.
146
+
147
+ On exit with convergence, a warning or an error, the
148
+ variable task contains additional information.
149
+
150
+ stpmin is a double precision variable.
151
+ On entry stpmin is a nonnegative lower bound for the step.
152
+ On exit stpmin is unchanged.
153
+
154
+ stpmax is a double precision variable.
155
+ On entry stpmax is a nonnegative upper bound for the step.
156
+ On exit stpmax is unchanged.
157
+
158
+ isave is an integer work array of dimension 2.
159
+
160
+ dsave is a double precision work array of dimension 13.
161
+
162
+ Subprograms called
163
+
164
+ MINPACK-2 ... dcstep
165
+ MINPACK-1 Project. June 1983.
166
+ Argonne National Laboratory.
167
+ Jorge J. More' and David J. Thuente.
168
+
169
+ MINPACK-2 Project. November 1993.
170
+ Argonne National Laboratory and University of Minnesota.
171
+ Brett M. Averick, Richard G. Carter, and Jorge J. More'.
172
+ """
173
+
174
+ def __init__(self, phi, derphi, ftol, gtol, xtol, stpmin, stpmax):
175
+ self.stage = None
176
+ self.ginit = None
177
+ self.gtest = None
178
+ self.gx = None
179
+ self.gy = None
180
+ self.finit = None
181
+ self.fx = None
182
+ self.fy = None
183
+ self.stx = None
184
+ self.sty = None
185
+ self.stmin = None
186
+ self.stmax = None
187
+ self.width = None
188
+ self.width1 = None
189
+
190
+ # leave all assessment of tolerances/limits to the first call of
191
+ # this object
192
+ self.ftol = ftol
193
+ self.gtol = gtol
194
+ self.xtol = xtol
195
+ self.stpmin = stpmin
196
+ self.stpmax = stpmax
197
+
198
+ self.phi = phi
199
+ self.derphi = derphi
200
+
201
+ def __call__(self, alpha1, phi0=None, derphi0=None, maxiter=100):
202
+ """
203
+ Parameters
204
+ ----------
205
+ alpha1 : float
206
+ alpha1 is the current estimate of a satisfactory
207
+ step. A positive initial estimate must be provided.
208
+ phi0 : float
209
+ the value of `phi` at 0 (if known).
210
+ derphi0 : float
211
+ the derivative of `derphi` at 0 (if known).
212
+ maxiter : int
213
+
214
+ Returns
215
+ -------
216
+ alpha : float
217
+ Step size, or None if no suitable step was found.
218
+ phi : float
219
+ Value of `phi` at the new point `alpha`.
220
+ phi0 : float
221
+ Value of `phi` at `alpha=0`.
222
+ task : bytes
223
+ On exit task indicates status information.
224
+
225
+ If task[:4] == b'CONV' then the search is successful.
226
+
227
+ If task[:4] == b'WARN' then the subroutine is not able
228
+ to satisfy the convergence conditions. The exit value of
229
+ stp contains the best point found during the search.
230
+
231
+ If task[:5] == b'ERROR' then there is an error in the
232
+ input arguments.
233
+ """
234
+ if phi0 is None:
235
+ phi0 = self.phi(0.0)
236
+ if derphi0 is None:
237
+ derphi0 = self.derphi(0.0)
238
+
239
+ phi1 = phi0
240
+ derphi1 = derphi0
241
+
242
+ task = b"START"
243
+ for i in range(maxiter):
244
+ stp, phi1, derphi1, task = self._iterate(
245
+ alpha1, phi1, derphi1, task
246
+ )
247
+
248
+ if not np.isfinite(stp):
249
+ task = b"WARN"
250
+ stp = None
251
+ break
252
+
253
+ if task[:2] == b"FG":
254
+ alpha1 = stp
255
+ phi1 = self.phi(stp)
256
+ derphi1 = self.derphi(stp)
257
+ else:
258
+ break
259
+ else:
260
+ # maxiter reached, the line search did not converge
261
+ stp = None
262
+ task = b"WARNING: dcsrch did not converge within max iterations"
263
+
264
+ if task[:5] == b"ERROR" or task[:4] == b"WARN":
265
+ stp = None # failed
266
+
267
+ return stp, phi1, phi0, task
268
+
269
+ def _iterate(self, stp, f, g, task):
270
+ """
271
+ Parameters
272
+ ----------
273
+ stp : float
274
+ The current estimate of a satisfactory step. On initial entry, a
275
+ positive initial estimate must be provided.
276
+ f : float
277
+ On first call f is the value of the function at 0. On subsequent
278
+ entries f should be the value of the function at stp.
279
+ g : float
280
+ On initial entry g is the derivative of the function at 0. On
281
+ subsequent entries g is the derivative of the function at stp.
282
+ task : bytes
283
+ On initial entry task must be set to 'START'.
284
+
285
+ On exit with convergence, a warning or an error, the
286
+ variable task contains additional information.
287
+
288
+
289
+ Returns
290
+ -------
291
+ stp, f, g, task: tuple
292
+
293
+ stp : float
294
+ the current estimate of a satisfactory step if task = 'FG'. If
295
+ task = 'CONV' then stp satisfies the sufficient decrease and
296
+ curvature condition.
297
+ f : float
298
+ the value of the function at stp.
299
+ g : float
300
+ the derivative of the function at stp.
301
+ task : bytes
302
+ On exit task indicates the required action:
303
+
304
+ If task(1:2) == b'FG' then evaluate the function and
305
+ derivative at stp and call dcsrch again.
306
+
307
+ If task(1:4) == b'CONV' then the search is successful.
308
+
309
+ If task(1:4) == b'WARN' then the subroutine is not able
310
+ to satisfy the convergence conditions. The exit value of
311
+ stp contains the best point found during the search.
312
+
313
+ If task(1:5) == b'ERROR' then there is an error in the
314
+ input arguments.
315
+ """
316
+ p5 = 0.5
317
+ p66 = 0.66
318
+ xtrapl = 1.1
319
+ xtrapu = 4.0
320
+
321
+ if task[:5] == b"START":
322
+ if stp < self.stpmin:
323
+ task = b"ERROR: STP .LT. STPMIN"
324
+ if stp > self.stpmax:
325
+ task = b"ERROR: STP .GT. STPMAX"
326
+ if g >= 0:
327
+ task = b"ERROR: INITIAL G .GE. ZERO"
328
+ if self.ftol < 0:
329
+ task = b"ERROR: FTOL .LT. ZERO"
330
+ if self.gtol < 0:
331
+ task = b"ERROR: GTOL .LT. ZERO"
332
+ if self.xtol < 0:
333
+ task = b"ERROR: XTOL .LT. ZERO"
334
+ if self.stpmin < 0:
335
+ task = b"ERROR: STPMIN .LT. ZERO"
336
+ if self.stpmax < self.stpmin:
337
+ task = b"ERROR: STPMAX .LT. STPMIN"
338
+
339
+ if task[:5] == b"ERROR":
340
+ return stp, f, g, task
341
+
342
+ # Initialize local variables.
343
+
344
+ self.brackt = False
345
+ self.stage = 1
346
+ self.finit = f
347
+ self.ginit = g
348
+ self.gtest = self.ftol * self.ginit
349
+ self.width = self.stpmax - self.stpmin
350
+ self.width1 = self.width / p5
351
+
352
+ # The variables stx, fx, gx contain the values of the step,
353
+ # function, and derivative at the best step.
354
+ # The variables sty, fy, gy contain the value of the step,
355
+ # function, and derivative at sty.
356
+ # The variables stp, f, g contain the values of the step,
357
+ # function, and derivative at stp.
358
+
359
+ self.stx = 0.0
360
+ self.fx = self.finit
361
+ self.gx = self.ginit
362
+ self.sty = 0.0
363
+ self.fy = self.finit
364
+ self.gy = self.ginit
365
+ self.stmin = 0
366
+ self.stmax = stp + xtrapu * stp
367
+ task = b"FG"
368
+ return stp, f, g, task
369
+
370
+ # in the original Fortran this was a location to restore variables
371
+ # we don't need to do that because they're attributes.
372
+
373
+ # If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
374
+ # algorithm enters the second stage.
375
+ ftest = self.finit + stp * self.gtest
376
+
377
+ if self.stage == 1 and f <= ftest and g >= 0:
378
+ self.stage = 2
379
+
380
+ # test for warnings
381
+ if self.brackt and (stp <= self.stmin or stp >= self.stmax):
382
+ task = b"WARNING: ROUNDING ERRORS PREVENT PROGRESS"
383
+ if self.brackt and self.stmax - self.stmin <= self.xtol * self.stmax:
384
+ task = b"WARNING: XTOL TEST SATISFIED"
385
+ if stp == self.stpmax and f <= ftest and g <= self.gtest:
386
+ task = b"WARNING: STP = STPMAX"
387
+ if stp == self.stpmin and (f > ftest or g >= self.gtest):
388
+ task = b"WARNING: STP = STPMIN"
389
+
390
+ # test for convergence
391
+ if f <= ftest and abs(g) <= self.gtol * -self.ginit:
392
+ task = b"CONVERGENCE"
393
+
394
+ # test for termination
395
+ if task[:4] == b"WARN" or task[:4] == b"CONV":
396
+ return stp, f, g, task
397
+
398
+ # A modified function is used to predict the step during the
399
+ # first stage if a lower function value has been obtained but
400
+ # the decrease is not sufficient.
401
+ if self.stage == 1 and f <= self.fx and f > ftest:
402
+ # Define the modified function and derivative values.
403
+ fm = f - stp * self.gtest
404
+ fxm = self.fx - self.stx * self.gtest
405
+ fym = self.fy - self.sty * self.gtest
406
+ gm = g - self.gtest
407
+ gxm = self.gx - self.gtest
408
+ gym = self.gy - self.gtest
409
+
410
+ # Call dcstep to update stx, sty, and to compute the new step.
411
+ # dcstep can have several operations which can produce NaN
412
+ # e.g. inf/inf. Filter these out.
413
+ with np.errstate(invalid="ignore", over="ignore"):
414
+ tup = dcstep(
415
+ self.stx,
416
+ fxm,
417
+ gxm,
418
+ self.sty,
419
+ fym,
420
+ gym,
421
+ stp,
422
+ fm,
423
+ gm,
424
+ self.brackt,
425
+ self.stmin,
426
+ self.stmax,
427
+ )
428
+ self.stx, fxm, gxm, self.sty, fym, gym, stp, self.brackt = tup
429
+
430
+ # Reset the function and derivative values for f
431
+ self.fx = fxm + self.stx * self.gtest
432
+ self.fy = fym + self.sty * self.gtest
433
+ self.gx = gxm + self.gtest
434
+ self.gy = gym + self.gtest
435
+
436
+ else:
437
+ # Call dcstep to update stx, sty, and to compute the new step.
438
+ # dcstep can have several operations which can produce NaN
439
+ # e.g. inf/inf. Filter these out.
440
+
441
+ with np.errstate(invalid="ignore", over="ignore"):
442
+ tup = dcstep(
443
+ self.stx,
444
+ self.fx,
445
+ self.gx,
446
+ self.sty,
447
+ self.fy,
448
+ self.gy,
449
+ stp,
450
+ f,
451
+ g,
452
+ self.brackt,
453
+ self.stmin,
454
+ self.stmax,
455
+ )
456
+ (
457
+ self.stx,
458
+ self.fx,
459
+ self.gx,
460
+ self.sty,
461
+ self.fy,
462
+ self.gy,
463
+ stp,
464
+ self.brackt,
465
+ ) = tup
466
+
467
+ # Decide if a bisection step is needed
468
+ if self.brackt:
469
+ if abs(self.sty - self.stx) >= p66 * self.width1:
470
+ stp = self.stx + p5 * (self.sty - self.stx)
471
+ self.width1 = self.width
472
+ self.width = abs(self.sty - self.stx)
473
+
474
+ # Set the minimum and maximum steps allowed for stp.
475
+ if self.brackt:
476
+ self.stmin = min(self.stx, self.sty)
477
+ self.stmax = max(self.stx, self.sty)
478
+ else:
479
+ self.stmin = stp + xtrapl * (stp - self.stx)
480
+ self.stmax = stp + xtrapu * (stp - self.stx)
481
+
482
+ # Force the step to be within the bounds stpmax and stpmin.
483
+ stp = np.clip(stp, self.stpmin, self.stpmax)
484
+
485
+ # If further progress is not possible, let stp be the best
486
+ # point obtained during the search.
487
+ if (
488
+ self.brackt
489
+ and (stp <= self.stmin or stp >= self.stmax)
490
+ or (
491
+ self.brackt
492
+ and self.stmax - self.stmin <= self.xtol * self.stmax
493
+ )
494
+ ):
495
+ stp = self.stx
496
+
497
+ # Obtain another function and derivative
498
+ task = b"FG"
499
+ return stp, f, g, task
500
+
501
+
502
+ def dcstep(stx, fx, dx, sty, fy, dy, stp, fp, dp, brackt, stpmin, stpmax):
503
+ """
504
+ Subroutine dcstep
505
+
506
+ This subroutine computes a safeguarded step for a search
507
+ procedure and updates an interval that contains a step that
508
+ satisfies a sufficient decrease and a curvature condition.
509
+
510
+ The parameter stx contains the step with the least function
511
+ value. If brackt is set to .true. then a minimizer has
512
+ been bracketed in an interval with endpoints stx and sty.
513
+ The parameter stp contains the current step.
514
+ The subroutine assumes that if brackt is set to .true. then
515
+
516
+ min(stx,sty) < stp < max(stx,sty),
517
+
518
+ and that the derivative at stx is negative in the direction
519
+ of the step.
520
+
521
+ The subroutine statement is
522
+
523
+ subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
524
+ stpmin,stpmax)
525
+
526
+ where
527
+
528
+ stx is a double precision variable.
529
+ On entry stx is the best step obtained so far and is an
530
+ endpoint of the interval that contains the minimizer.
531
+ On exit stx is the updated best step.
532
+
533
+ fx is a double precision variable.
534
+ On entry fx is the function at stx.
535
+ On exit fx is the function at stx.
536
+
537
+ dx is a double precision variable.
538
+ On entry dx is the derivative of the function at
539
+ stx. The derivative must be negative in the direction of
540
+ the step, that is, dx and stp - stx must have opposite
541
+ signs.
542
+ On exit dx is the derivative of the function at stx.
543
+
544
+ sty is a double precision variable.
545
+ On entry sty is the second endpoint of the interval that
546
+ contains the minimizer.
547
+ On exit sty is the updated endpoint of the interval that
548
+ contains the minimizer.
549
+
550
+ fy is a double precision variable.
551
+ On entry fy is the function at sty.
552
+ On exit fy is the function at sty.
553
+
554
+ dy is a double precision variable.
555
+ On entry dy is the derivative of the function at sty.
556
+ On exit dy is the derivative of the function at the exit sty.
557
+
558
+ stp is a double precision variable.
559
+ On entry stp is the current step. If brackt is set to .true.
560
+ then on input stp must be between stx and sty.
561
+ On exit stp is a new trial step.
562
+
563
+ fp is a double precision variable.
564
+ On entry fp is the function at stp
565
+ On exit fp is unchanged.
566
+
567
+ dp is a double precision variable.
568
+ On entry dp is the derivative of the function at stp.
569
+ On exit dp is unchanged.
570
+
571
+ brackt is an logical variable.
572
+ On entry brackt specifies if a minimizer has been bracketed.
573
+ Initially brackt must be set to .false.
574
+ On exit brackt specifies if a minimizer has been bracketed.
575
+ When a minimizer is bracketed brackt is set to .true.
576
+
577
+ stpmin is a double precision variable.
578
+ On entry stpmin is a lower bound for the step.
579
+ On exit stpmin is unchanged.
580
+
581
+ stpmax is a double precision variable.
582
+ On entry stpmax is an upper bound for the step.
583
+ On exit stpmax is unchanged.
584
+
585
+ MINPACK-1 Project. June 1983
586
+ Argonne National Laboratory.
587
+ Jorge J. More' and David J. Thuente.
588
+
589
+ MINPACK-2 Project. November 1993.
590
+ Argonne National Laboratory and University of Minnesota.
591
+ Brett M. Averick and Jorge J. More'.
592
+
593
+ """
594
+ sgn_dp = np.sign(dp)
595
+ sgn_dx = np.sign(dx)
596
+
597
+ # sgnd = dp * (dx / abs(dx))
598
+ sgnd = sgn_dp * sgn_dx
599
+
600
+ # First case: A higher function value. The minimum is bracketed.
601
+ # If the cubic step is closer to stx than the quadratic step, the
602
+ # cubic step is taken, otherwise the average of the cubic and
603
+ # quadratic steps is taken.
604
+ if fp > fx:
605
+ theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp
606
+ s = max(abs(theta), abs(dx), abs(dp))
607
+ gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
608
+ if stp < stx:
609
+ gamma *= -1
610
+ p = (gamma - dx) + theta
611
+ q = ((gamma - dx) + gamma) + dp
612
+ r = p / q
613
+ stpc = stx + r * (stp - stx)
614
+ stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.0) * (stp - stx)
615
+ if abs(stpc - stx) <= abs(stpq - stx):
616
+ stpf = stpc
617
+ else:
618
+ stpf = stpc + (stpq - stpc) / 2.0
619
+ brackt = True
620
+ elif sgnd < 0.0:
621
+ # Second case: A lower function value and derivatives of opposite
622
+ # sign. The minimum is bracketed. If the cubic step is farther from
623
+ # stp than the secant step, the cubic step is taken, otherwise the
624
+ # secant step is taken.
625
+ theta = 3 * (fx - fp) / (stp - stx) + dx + dp
626
+ s = max(abs(theta), abs(dx), abs(dp))
627
+ gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
628
+ if stp > stx:
629
+ gamma *= -1
630
+ p = (gamma - dp) + theta
631
+ q = ((gamma - dp) + gamma) + dx
632
+ r = p / q
633
+ stpc = stp + r * (stx - stp)
634
+ stpq = stp + (dp / (dp - dx)) * (stx - stp)
635
+ if abs(stpc - stp) > abs(stpq - stp):
636
+ stpf = stpc
637
+ else:
638
+ stpf = stpq
639
+ brackt = True
640
+ elif abs(dp) < abs(dx):
641
+ # Third case: A lower function value, derivatives of the same sign,
642
+ # and the magnitude of the derivative decreases.
643
+
644
+ # The cubic step is computed only if the cubic tends to infinity
645
+ # in the direction of the step or if the minimum of the cubic
646
+ # is beyond stp. Otherwise the cubic step is defined to be the
647
+ # secant step.
648
+ theta = 3 * (fx - fp) / (stp - stx) + dx + dp
649
+ s = max(abs(theta), abs(dx), abs(dp))
650
+
651
+ # The case gamma = 0 only arises if the cubic does not tend
652
+ # to infinity in the direction of the step.
653
+ gamma = s * np.sqrt(max(0, (theta / s) ** 2 - (dx / s) * (dp / s)))
654
+ if stp > stx:
655
+ gamma = -gamma
656
+ p = (gamma - dp) + theta
657
+ q = (gamma + (dx - dp)) + gamma
658
+ r = p / q
659
+ if r < 0 and gamma != 0:
660
+ stpc = stp + r * (stx - stp)
661
+ elif stp > stx:
662
+ stpc = stpmax
663
+ else:
664
+ stpc = stpmin
665
+ stpq = stp + (dp / (dp - dx)) * (stx - stp)
666
+
667
+ if brackt:
668
+ # A minimizer has been bracketed. If the cubic step is
669
+ # closer to stp than the secant step, the cubic step is
670
+ # taken, otherwise the secant step is taken.
671
+ if abs(stpc - stp) < abs(stpq - stp):
672
+ stpf = stpc
673
+ else:
674
+ stpf = stpq
675
+
676
+ if stp > stx:
677
+ stpf = min(stp + 0.66 * (sty - stp), stpf)
678
+ else:
679
+ stpf = max(stp + 0.66 * (sty - stp), stpf)
680
+ else:
681
+ # A minimizer has not been bracketed. If the cubic step is
682
+ # farther from stp than the secant step, the cubic step is
683
+ # taken, otherwise the secant step is taken.
684
+ if abs(stpc - stp) > abs(stpq - stp):
685
+ stpf = stpc
686
+ else:
687
+ stpf = stpq
688
+ stpf = np.clip(stpf, stpmin, stpmax)
689
+
690
+ else:
691
+ # Fourth case: A lower function value, derivatives of the same sign,
692
+ # and the magnitude of the derivative does not decrease. If the
693
+ # minimum is not bracketed, the step is either stpmin or stpmax,
694
+ # otherwise the cubic step is taken.
695
+ if brackt:
696
+ theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp
697
+ s = max(abs(theta), abs(dy), abs(dp))
698
+ gamma = s * np.sqrt((theta / s) ** 2 - (dy / s) * (dp / s))
699
+ if stp > sty:
700
+ gamma = -gamma
701
+ p = (gamma - dp) + theta
702
+ q = ((gamma - dp) + gamma) + dy
703
+ r = p / q
704
+ stpc = stp + r * (sty - stp)
705
+ stpf = stpc
706
+ elif stp > stx:
707
+ stpf = stpmax
708
+ else:
709
+ stpf = stpmin
710
+
711
+ # Update the interval which contains a minimizer.
712
+ if fp > fx:
713
+ sty = stp
714
+ fy = fp
715
+ dy = dp
716
+ else:
717
+ if sgnd < 0:
718
+ sty = stx
719
+ fy = fx
720
+ dy = dx
721
+ stx = stp
722
+ fx = fp
723
+ dx = dp
724
+
725
+ # Compute the new step.
726
+ stp = stpf
727
+
728
+ return stx, fx, dx, sty, fy, dy, stp, brackt
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py ADDED
@@ -0,0 +1,693 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import scipy.sparse as sps
3
+ from ._numdiff import approx_derivative, group_columns
4
+ from ._hessian_update_strategy import HessianUpdateStrategy
5
+ from scipy.sparse.linalg import LinearOperator
6
+ from scipy._lib._array_api import atleast_nd, array_namespace
7
+
8
+
9
+ FD_METHODS = ('2-point', '3-point', 'cs')
10
+
11
+
12
+ def _wrapper_fun(fun, args=()):
13
+ ncalls = [0]
14
+
15
+ def wrapped(x):
16
+ ncalls[0] += 1
17
+ # Send a copy because the user may overwrite it.
18
+ # Overwriting results in undefined behaviour because
19
+ # fun(self.x) will change self.x, with the two no longer linked.
20
+ fx = fun(np.copy(x), *args)
21
+ # Make sure the function returns a true scalar
22
+ if not np.isscalar(fx):
23
+ try:
24
+ fx = np.asarray(fx).item()
25
+ except (TypeError, ValueError) as e:
26
+ raise ValueError(
27
+ "The user-provided objective function "
28
+ "must return a scalar value."
29
+ ) from e
30
+ return fx
31
+ return wrapped, ncalls
32
+
33
+
34
+ def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None):
35
+ ncalls = [0]
36
+
37
+ if callable(grad):
38
+ def wrapped(x, **kwds):
39
+ # kwds present to give function same signature as numdiff variant
40
+ ncalls[0] += 1
41
+ return np.atleast_1d(grad(np.copy(x), *args))
42
+ return wrapped, ncalls
43
+
44
+ elif grad in FD_METHODS:
45
+ def wrapped1(x, f0=None):
46
+ ncalls[0] += 1
47
+ return approx_derivative(
48
+ fun, x, f0=f0, **finite_diff_options
49
+ )
50
+
51
+ return wrapped1, ncalls
52
+
53
+
54
+ def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
55
+ if callable(hess):
56
+ H = hess(np.copy(x0), *args)
57
+ ncalls = [1]
58
+
59
+ if sps.issparse(H):
60
+ def wrapped(x, **kwds):
61
+ ncalls[0] += 1
62
+ return sps.csr_matrix(hess(np.copy(x), *args))
63
+
64
+ H = sps.csr_matrix(H)
65
+
66
+ elif isinstance(H, LinearOperator):
67
+ def wrapped(x, **kwds):
68
+ ncalls[0] += 1
69
+ return hess(np.copy(x), *args)
70
+
71
+ else: # dense
72
+ def wrapped(x, **kwds):
73
+ ncalls[0] += 1
74
+ return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
75
+
76
+ H = np.atleast_2d(np.asarray(H))
77
+
78
+ return wrapped, ncalls, H
79
+ elif hess in FD_METHODS:
80
+ ncalls = [0]
81
+
82
+ def wrapped1(x, f0=None):
83
+ return approx_derivative(
84
+ grad, x, f0=f0, **finite_diff_options
85
+ )
86
+
87
+ return wrapped1, ncalls, None
88
+
89
+
90
+ class ScalarFunction:
91
+ """Scalar function and its derivatives.
92
+
93
+ This class defines a scalar function F: R^n->R and methods for
94
+ computing or approximating its first and second derivatives.
95
+
96
+ Parameters
97
+ ----------
98
+ fun : callable
99
+ evaluates the scalar function. Must be of the form ``fun(x, *args)``,
100
+ where ``x`` is the argument in the form of a 1-D array and ``args`` is
101
+ a tuple of any additional fixed parameters needed to completely specify
102
+ the function. Should return a scalar.
103
+ x0 : array-like
104
+ Provides an initial set of variables for evaluating fun. Array of real
105
+ elements of size (n,), where 'n' is the number of independent
106
+ variables.
107
+ args : tuple, optional
108
+ Any additional fixed parameters needed to completely specify the scalar
109
+ function.
110
+ grad : {callable, '2-point', '3-point', 'cs'}
111
+ Method for computing the gradient vector.
112
+ If it is a callable, it should be a function that returns the gradient
113
+ vector:
114
+
115
+ ``grad(x, *args) -> array_like, shape (n,)``
116
+
117
+ where ``x`` is an array with shape (n,) and ``args`` is a tuple with
118
+ the fixed parameters.
119
+ Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
120
+ to select a finite difference scheme for numerical estimation of the
121
+ gradient with a relative step size. These finite difference schemes
122
+ obey any specified `bounds`.
123
+ hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
124
+ Method for computing the Hessian matrix. If it is callable, it should
125
+ return the Hessian matrix:
126
+
127
+ ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
128
+
129
+ where x is a (n,) ndarray and `args` is a tuple with the fixed
130
+ parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
131
+ select a finite difference scheme for numerical estimation. Or, objects
132
+ implementing `HessianUpdateStrategy` interface can be used to
133
+ approximate the Hessian.
134
+ Whenever the gradient is estimated via finite-differences, the Hessian
135
+ cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
136
+ to be estimated using one of the quasi-Newton strategies.
137
+ finite_diff_rel_step : None or array_like
138
+ Relative step size to use. The absolute step size is computed as
139
+ ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
140
+ adjusted to fit into the bounds. For ``method='3-point'`` the sign
141
+ of `h` is ignored. If None then finite_diff_rel_step is selected
142
+ automatically,
143
+ finite_diff_bounds : tuple of array_like
144
+ Lower and upper bounds on independent variables. Defaults to no bounds,
145
+ (-np.inf, np.inf). Each bound must match the size of `x0` or be a
146
+ scalar, in the latter case the bound will be the same for all
147
+ variables. Use it to limit the range of function evaluation.
148
+ epsilon : None or array_like, optional
149
+ Absolute step size to use, possibly adjusted to fit into the bounds.
150
+ For ``method='3-point'`` the sign of `epsilon` is ignored. By default
151
+ relative steps are used, only if ``epsilon is not None`` are absolute
152
+ steps used.
153
+
154
+ Notes
155
+ -----
156
+ This class implements a memoization logic. There are methods `fun`,
157
+ `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
158
+ things should be considered:
159
+
160
+ 1. Use only public methods `fun`, `grad` and `hess`.
161
+ 2. After one of the methods is called, the corresponding attribute
162
+ will be set. However, a subsequent call with a different argument
163
+ of *any* of the methods may overwrite the attribute.
164
+ """
165
+ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
166
+ finite_diff_bounds, epsilon=None):
167
+ if not callable(grad) and grad not in FD_METHODS:
168
+ raise ValueError(
169
+ f"`grad` must be either callable or one of {FD_METHODS}."
170
+ )
171
+
172
+ if not (callable(hess) or hess in FD_METHODS
173
+ or isinstance(hess, HessianUpdateStrategy)):
174
+ raise ValueError(
175
+ f"`hess` must be either callable, HessianUpdateStrategy"
176
+ f" or one of {FD_METHODS}."
177
+ )
178
+
179
+ if grad in FD_METHODS and hess in FD_METHODS:
180
+ raise ValueError("Whenever the gradient is estimated via "
181
+ "finite-differences, we require the Hessian "
182
+ "to be estimated using one of the "
183
+ "quasi-Newton strategies.")
184
+
185
+ self.xp = xp = array_namespace(x0)
186
+ _x = atleast_nd(x0, ndim=1, xp=xp)
187
+ _dtype = xp.float64
188
+ if xp.isdtype(_x.dtype, "real floating"):
189
+ _dtype = _x.dtype
190
+
191
+ # original arguments
192
+ self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args)
193
+ self._orig_fun = fun
194
+ self._orig_grad = grad
195
+ self._orig_hess = hess
196
+ self._args = args
197
+
198
+ # promotes to floating
199
+ self.x = xp.astype(_x, _dtype)
200
+ self.x_dtype = _dtype
201
+ self.n = self.x.size
202
+ self.f_updated = False
203
+ self.g_updated = False
204
+ self.H_updated = False
205
+
206
+ self._lowest_x = None
207
+ self._lowest_f = np.inf
208
+
209
+ finite_diff_options = {}
210
+ if grad in FD_METHODS:
211
+ finite_diff_options["method"] = grad
212
+ finite_diff_options["rel_step"] = finite_diff_rel_step
213
+ finite_diff_options["abs_step"] = epsilon
214
+ finite_diff_options["bounds"] = finite_diff_bounds
215
+ if hess in FD_METHODS:
216
+ finite_diff_options["method"] = hess
217
+ finite_diff_options["rel_step"] = finite_diff_rel_step
218
+ finite_diff_options["abs_step"] = epsilon
219
+ finite_diff_options["as_linear_operator"] = True
220
+
221
+ # Initial function evaluation
222
+ self._update_fun()
223
+
224
+ # Initial gradient evaluation
225
+ self._wrapped_grad, self._ngev = _wrapper_grad(
226
+ grad,
227
+ fun=self._wrapped_fun,
228
+ args=args,
229
+ finite_diff_options=finite_diff_options
230
+ )
231
+ self._update_grad()
232
+
233
+ # Hessian evaluation
234
+ if callable(hess):
235
+ self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
236
+ hess, x0=x0, args=args
237
+ )
238
+ self.H_updated = True
239
+ elif hess in FD_METHODS:
240
+ self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
241
+ hess,
242
+ grad=self._wrapped_grad,
243
+ x0=x0,
244
+ finite_diff_options=finite_diff_options
245
+ )
246
+ self._update_grad()
247
+ self.H = self._wrapped_hess(self.x, f0=self.g)
248
+ self.H_updated = True
249
+ elif isinstance(hess, HessianUpdateStrategy):
250
+ self.H = hess
251
+ self.H.initialize(self.n, 'hess')
252
+ self.H_updated = True
253
+ self.x_prev = None
254
+ self.g_prev = None
255
+ self._nhev = [0]
256
+
257
+ @property
258
+ def nfev(self):
259
+ return self._nfev[0]
260
+
261
+ @property
262
+ def ngev(self):
263
+ return self._ngev[0]
264
+
265
+ @property
266
+ def nhev(self):
267
+ return self._nhev[0]
268
+
269
+ def _update_x(self, x):
270
+ if isinstance(self._orig_hess, HessianUpdateStrategy):
271
+ self._update_grad()
272
+ self.x_prev = self.x
273
+ self.g_prev = self.g
274
+ # ensure that self.x is a copy of x. Don't store a reference
275
+ # otherwise the memoization doesn't work properly.
276
+
277
+ _x = atleast_nd(x, ndim=1, xp=self.xp)
278
+ self.x = self.xp.astype(_x, self.x_dtype)
279
+ self.f_updated = False
280
+ self.g_updated = False
281
+ self.H_updated = False
282
+ self._update_hess()
283
+ else:
284
+ # ensure that self.x is a copy of x. Don't store a reference
285
+ # otherwise the memoization doesn't work properly.
286
+ _x = atleast_nd(x, ndim=1, xp=self.xp)
287
+ self.x = self.xp.astype(_x, self.x_dtype)
288
+ self.f_updated = False
289
+ self.g_updated = False
290
+ self.H_updated = False
291
+
292
+ def _update_fun(self):
293
+ if not self.f_updated:
294
+ fx = self._wrapped_fun(self.x)
295
+ if fx < self._lowest_f:
296
+ self._lowest_x = self.x
297
+ self._lowest_f = fx
298
+
299
+ self.f = fx
300
+ self.f_updated = True
301
+
302
+ def _update_grad(self):
303
+ if not self.g_updated:
304
+ if self._orig_grad in FD_METHODS:
305
+ self._update_fun()
306
+ self.g = self._wrapped_grad(self.x, f0=self.f)
307
+ self.g_updated = True
308
+
309
+ def _update_hess(self):
310
+ if not self.H_updated:
311
+ if self._orig_hess in FD_METHODS:
312
+ self._update_grad()
313
+ self.H = self._wrapped_hess(self.x, f0=self.g)
314
+ elif isinstance(self._orig_hess, HessianUpdateStrategy):
315
+ self._update_grad()
316
+ self.H.update(self.x - self.x_prev, self.g - self.g_prev)
317
+ else: # should be callable(hess)
318
+ self.H = self._wrapped_hess(self.x)
319
+
320
+ self.H_updated = True
321
+
322
+ def fun(self, x):
323
+ if not np.array_equal(x, self.x):
324
+ self._update_x(x)
325
+ self._update_fun()
326
+ return self.f
327
+
328
+ def grad(self, x):
329
+ if not np.array_equal(x, self.x):
330
+ self._update_x(x)
331
+ self._update_grad()
332
+ return self.g
333
+
334
+ def hess(self, x):
335
+ if not np.array_equal(x, self.x):
336
+ self._update_x(x)
337
+ self._update_hess()
338
+ return self.H
339
+
340
+ def fun_and_grad(self, x):
341
+ if not np.array_equal(x, self.x):
342
+ self._update_x(x)
343
+ self._update_fun()
344
+ self._update_grad()
345
+ return self.f, self.g
346
+
347
+
348
+ class VectorFunction:
349
+ """Vector function and its derivatives.
350
+
351
+ This class defines a vector function F: R^n->R^m and methods for
352
+ computing or approximating its first and second derivatives.
353
+
354
+ Notes
355
+ -----
356
+ This class implements a memoization logic. There are methods `fun`,
357
+ `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
358
+ things should be considered:
359
+
360
+ 1. Use only public methods `fun`, `jac` and `hess`.
361
+ 2. After one of the methods is called, the corresponding attribute
362
+ will be set. However, a subsequent call with a different argument
363
+ of *any* of the methods may overwrite the attribute.
364
+ """
365
+ def __init__(self, fun, x0, jac, hess,
366
+ finite_diff_rel_step, finite_diff_jac_sparsity,
367
+ finite_diff_bounds, sparse_jacobian):
368
+ if not callable(jac) and jac not in FD_METHODS:
369
+ raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")
370
+
371
+ if not (callable(hess) or hess in FD_METHODS
372
+ or isinstance(hess, HessianUpdateStrategy)):
373
+ raise ValueError("`hess` must be either callable,"
374
+ f"HessianUpdateStrategy or one of {FD_METHODS}.")
375
+
376
+ if jac in FD_METHODS and hess in FD_METHODS:
377
+ raise ValueError("Whenever the Jacobian is estimated via "
378
+ "finite-differences, we require the Hessian to "
379
+ "be estimated using one of the quasi-Newton "
380
+ "strategies.")
381
+
382
+ self.xp = xp = array_namespace(x0)
383
+ _x = atleast_nd(x0, ndim=1, xp=xp)
384
+ _dtype = xp.float64
385
+ if xp.isdtype(_x.dtype, "real floating"):
386
+ _dtype = _x.dtype
387
+
388
+ # promotes to floating
389
+ self.x = xp.astype(_x, _dtype)
390
+ self.x_dtype = _dtype
391
+
392
+ self.n = self.x.size
393
+ self.nfev = 0
394
+ self.njev = 0
395
+ self.nhev = 0
396
+ self.f_updated = False
397
+ self.J_updated = False
398
+ self.H_updated = False
399
+
400
+ finite_diff_options = {}
401
+ if jac in FD_METHODS:
402
+ finite_diff_options["method"] = jac
403
+ finite_diff_options["rel_step"] = finite_diff_rel_step
404
+ if finite_diff_jac_sparsity is not None:
405
+ sparsity_groups = group_columns(finite_diff_jac_sparsity)
406
+ finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
407
+ sparsity_groups)
408
+ finite_diff_options["bounds"] = finite_diff_bounds
409
+ self.x_diff = np.copy(self.x)
410
+ if hess in FD_METHODS:
411
+ finite_diff_options["method"] = hess
412
+ finite_diff_options["rel_step"] = finite_diff_rel_step
413
+ finite_diff_options["as_linear_operator"] = True
414
+ self.x_diff = np.copy(self.x)
415
+ if jac in FD_METHODS and hess in FD_METHODS:
416
+ raise ValueError("Whenever the Jacobian is estimated via "
417
+ "finite-differences, we require the Hessian to "
418
+ "be estimated using one of the quasi-Newton "
419
+ "strategies.")
420
+
421
+ # Function evaluation
422
+ def fun_wrapped(x):
423
+ self.nfev += 1
424
+ return np.atleast_1d(fun(x))
425
+
426
+ def update_fun():
427
+ self.f = fun_wrapped(self.x)
428
+
429
+ self._update_fun_impl = update_fun
430
+ update_fun()
431
+
432
+ self.v = np.zeros_like(self.f)
433
+ self.m = self.v.size
434
+
435
+ # Jacobian Evaluation
436
+ if callable(jac):
437
+ self.J = jac(self.x)
438
+ self.J_updated = True
439
+ self.njev += 1
440
+
441
+ if (sparse_jacobian or
442
+ sparse_jacobian is None and sps.issparse(self.J)):
443
+ def jac_wrapped(x):
444
+ self.njev += 1
445
+ return sps.csr_matrix(jac(x))
446
+ self.J = sps.csr_matrix(self.J)
447
+ self.sparse_jacobian = True
448
+
449
+ elif sps.issparse(self.J):
450
+ def jac_wrapped(x):
451
+ self.njev += 1
452
+ return jac(x).toarray()
453
+ self.J = self.J.toarray()
454
+ self.sparse_jacobian = False
455
+
456
+ else:
457
+ def jac_wrapped(x):
458
+ self.njev += 1
459
+ return np.atleast_2d(jac(x))
460
+ self.J = np.atleast_2d(self.J)
461
+ self.sparse_jacobian = False
462
+
463
+ def update_jac():
464
+ self.J = jac_wrapped(self.x)
465
+
466
+ elif jac in FD_METHODS:
467
+ self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
468
+ **finite_diff_options)
469
+ self.J_updated = True
470
+
471
+ if (sparse_jacobian or
472
+ sparse_jacobian is None and sps.issparse(self.J)):
473
+ def update_jac():
474
+ self._update_fun()
475
+ self.J = sps.csr_matrix(
476
+ approx_derivative(fun_wrapped, self.x, f0=self.f,
477
+ **finite_diff_options))
478
+ self.J = sps.csr_matrix(self.J)
479
+ self.sparse_jacobian = True
480
+
481
+ elif sps.issparse(self.J):
482
+ def update_jac():
483
+ self._update_fun()
484
+ self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
485
+ **finite_diff_options).toarray()
486
+ self.J = self.J.toarray()
487
+ self.sparse_jacobian = False
488
+
489
+ else:
490
+ def update_jac():
491
+ self._update_fun()
492
+ self.J = np.atleast_2d(
493
+ approx_derivative(fun_wrapped, self.x, f0=self.f,
494
+ **finite_diff_options))
495
+ self.J = np.atleast_2d(self.J)
496
+ self.sparse_jacobian = False
497
+
498
+ self._update_jac_impl = update_jac
499
+
500
+ # Define Hessian
501
+ if callable(hess):
502
+ self.H = hess(self.x, self.v)
503
+ self.H_updated = True
504
+ self.nhev += 1
505
+
506
+ if sps.issparse(self.H):
507
+ def hess_wrapped(x, v):
508
+ self.nhev += 1
509
+ return sps.csr_matrix(hess(x, v))
510
+ self.H = sps.csr_matrix(self.H)
511
+
512
+ elif isinstance(self.H, LinearOperator):
513
+ def hess_wrapped(x, v):
514
+ self.nhev += 1
515
+ return hess(x, v)
516
+
517
+ else:
518
+ def hess_wrapped(x, v):
519
+ self.nhev += 1
520
+ return np.atleast_2d(np.asarray(hess(x, v)))
521
+ self.H = np.atleast_2d(np.asarray(self.H))
522
+
523
+ def update_hess():
524
+ self.H = hess_wrapped(self.x, self.v)
525
+ elif hess in FD_METHODS:
526
+ def jac_dot_v(x, v):
527
+ return jac_wrapped(x).T.dot(v)
528
+
529
+ def update_hess():
530
+ self._update_jac()
531
+ self.H = approx_derivative(jac_dot_v, self.x,
532
+ f0=self.J.T.dot(self.v),
533
+ args=(self.v,),
534
+ **finite_diff_options)
535
+ update_hess()
536
+ self.H_updated = True
537
+ elif isinstance(hess, HessianUpdateStrategy):
538
+ self.H = hess
539
+ self.H.initialize(self.n, 'hess')
540
+ self.H_updated = True
541
+ self.x_prev = None
542
+ self.J_prev = None
543
+
544
+ def update_hess():
545
+ self._update_jac()
546
+ # When v is updated before x was updated, then x_prev and
547
+ # J_prev are None and we need this check.
548
+ if self.x_prev is not None and self.J_prev is not None:
549
+ delta_x = self.x - self.x_prev
550
+ delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
551
+ self.H.update(delta_x, delta_g)
552
+
553
+ self._update_hess_impl = update_hess
554
+
555
+ if isinstance(hess, HessianUpdateStrategy):
556
+ def update_x(x):
557
+ self._update_jac()
558
+ self.x_prev = self.x
559
+ self.J_prev = self.J
560
+ _x = atleast_nd(x, ndim=1, xp=self.xp)
561
+ self.x = self.xp.astype(_x, self.x_dtype)
562
+ self.f_updated = False
563
+ self.J_updated = False
564
+ self.H_updated = False
565
+ self._update_hess()
566
+ else:
567
+ def update_x(x):
568
+ _x = atleast_nd(x, ndim=1, xp=self.xp)
569
+ self.x = self.xp.astype(_x, self.x_dtype)
570
+ self.f_updated = False
571
+ self.J_updated = False
572
+ self.H_updated = False
573
+
574
+ self._update_x_impl = update_x
575
+
576
+ def _update_v(self, v):
577
+ if not np.array_equal(v, self.v):
578
+ self.v = v
579
+ self.H_updated = False
580
+
581
+ def _update_x(self, x):
582
+ if not np.array_equal(x, self.x):
583
+ self._update_x_impl(x)
584
+
585
+ def _update_fun(self):
586
+ if not self.f_updated:
587
+ self._update_fun_impl()
588
+ self.f_updated = True
589
+
590
+ def _update_jac(self):
591
+ if not self.J_updated:
592
+ self._update_jac_impl()
593
+ self.J_updated = True
594
+
595
+ def _update_hess(self):
596
+ if not self.H_updated:
597
+ self._update_hess_impl()
598
+ self.H_updated = True
599
+
600
+ def fun(self, x):
601
+ self._update_x(x)
602
+ self._update_fun()
603
+ return self.f
604
+
605
+ def jac(self, x):
606
+ self._update_x(x)
607
+ self._update_jac()
608
+ return self.J
609
+
610
+ def hess(self, x, v):
611
+ # v should be updated before x.
612
+ self._update_v(v)
613
+ self._update_x(x)
614
+ self._update_hess()
615
+ return self.H
616
+
617
+
618
+ class LinearVectorFunction:
619
+ """Linear vector function and its derivatives.
620
+
621
+ Defines a linear function F = A x, where x is N-D vector and
622
+ A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
623
+ is identically zero and it is returned as a csr matrix.
624
+ """
625
+ def __init__(self, A, x0, sparse_jacobian):
626
+ if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
627
+ self.J = sps.csr_matrix(A)
628
+ self.sparse_jacobian = True
629
+ elif sps.issparse(A):
630
+ self.J = A.toarray()
631
+ self.sparse_jacobian = False
632
+ else:
633
+ # np.asarray makes sure A is ndarray and not matrix
634
+ self.J = np.atleast_2d(np.asarray(A))
635
+ self.sparse_jacobian = False
636
+
637
+ self.m, self.n = self.J.shape
638
+
639
+ self.xp = xp = array_namespace(x0)
640
+ _x = atleast_nd(x0, ndim=1, xp=xp)
641
+ _dtype = xp.float64
642
+ if xp.isdtype(_x.dtype, "real floating"):
643
+ _dtype = _x.dtype
644
+
645
+ # promotes to floating
646
+ self.x = xp.astype(_x, _dtype)
647
+ self.x_dtype = _dtype
648
+
649
+ self.f = self.J.dot(self.x)
650
+ self.f_updated = True
651
+
652
+ self.v = np.zeros(self.m, dtype=float)
653
+ self.H = sps.csr_matrix((self.n, self.n))
654
+
655
+ def _update_x(self, x):
656
+ if not np.array_equal(x, self.x):
657
+ _x = atleast_nd(x, ndim=1, xp=self.xp)
658
+ self.x = self.xp.astype(_x, self.x_dtype)
659
+ self.f_updated = False
660
+
661
+ def fun(self, x):
662
+ self._update_x(x)
663
+ if not self.f_updated:
664
+ self.f = self.J.dot(x)
665
+ self.f_updated = True
666
+ return self.f
667
+
668
+ def jac(self, x):
669
+ self._update_x(x)
670
+ return self.J
671
+
672
+ def hess(self, x, v):
673
+ self._update_x(x)
674
+ self.v = v
675
+ return self.H
676
+
677
+
678
+ class IdentityVectorFunction(LinearVectorFunction):
679
+ """Identity vector function and its derivatives.
680
+
681
+ The Jacobian is the identity matrix, returned as a dense array when
682
+ `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
683
+ identically zero and it is returned as a csr matrix.
684
+ """
685
+ def __init__(self, x0, sparse_jacobian):
686
+ n = len(x0)
687
+ if sparse_jacobian or sparse_jacobian is None:
688
+ A = sps.eye(n, format='csr')
689
+ sparse_jacobian = True
690
+ else:
691
+ A = np.eye(n)
692
+ sparse_jacobian = False
693
+ super().__init__(A, x0, sparse_jacobian)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentialevolution.py ADDED
@@ -0,0 +1,1951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ differential_evolution: The differential evolution global optimization algorithm
3
+ Added by Andrew Nelson 2014
4
+ """
5
+ import warnings
6
+
7
+ import numpy as np
8
+ from scipy.optimize import OptimizeResult, minimize
9
+ from scipy.optimize._optimize import _status_message, _wrap_callback
10
+ from scipy._lib._util import (check_random_state, MapWrapper, _FunctionWrapper,
11
+ rng_integers)
12
+
13
+ from scipy.optimize._constraints import (Bounds, new_bounds_to_old,
14
+ NonlinearConstraint, LinearConstraint)
15
+ from scipy.sparse import issparse
16
+
17
+ __all__ = ['differential_evolution']
18
+
19
+
20
+ _MACHEPS = np.finfo(np.float64).eps
21
+
22
+
23
+ def differential_evolution(func, bounds, args=(), strategy='best1bin',
24
+ maxiter=1000, popsize=15, tol=0.01,
25
+ mutation=(0.5, 1), recombination=0.7, seed=None,
26
+ callback=None, disp=False, polish=True,
27
+ init='latinhypercube', atol=0, updating='immediate',
28
+ workers=1, constraints=(), x0=None, *,
29
+ integrality=None, vectorized=False):
30
+ """Finds the global minimum of a multivariate function.
31
+
32
+ The differential evolution method [1]_ is stochastic in nature. It does
33
+ not use gradient methods to find the minimum, and can search large areas
34
+ of candidate space, but often requires larger numbers of function
35
+ evaluations than conventional gradient-based techniques.
36
+
37
+ The algorithm is due to Storn and Price [2]_.
38
+
39
+ Parameters
40
+ ----------
41
+ func : callable
42
+ The objective function to be minimized. Must be in the form
43
+ ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
44
+ and ``args`` is a tuple of any additional fixed parameters needed to
45
+ completely specify the function. The number of parameters, N, is equal
46
+ to ``len(x)``.
47
+ bounds : sequence or `Bounds`
48
+ Bounds for variables. There are two ways to specify the bounds:
49
+
50
+ 1. Instance of `Bounds` class.
51
+ 2. ``(min, max)`` pairs for each element in ``x``, defining the
52
+ finite lower and upper bounds for the optimizing argument of
53
+ `func`.
54
+
55
+ The total number of bounds is used to determine the number of
56
+ parameters, N. If there are parameters whose bounds are equal the total
57
+ number of free parameters is ``N - N_equal``.
58
+
59
+ args : tuple, optional
60
+ Any additional fixed parameters needed to
61
+ completely specify the objective function.
62
+ strategy : {str, callable}, optional
63
+ The differential evolution strategy to use. Should be one of:
64
+
65
+ - 'best1bin'
66
+ - 'best1exp'
67
+ - 'rand1bin'
68
+ - 'rand1exp'
69
+ - 'rand2bin'
70
+ - 'rand2exp'
71
+ - 'randtobest1bin'
72
+ - 'randtobest1exp'
73
+ - 'currenttobest1bin'
74
+ - 'currenttobest1exp'
75
+ - 'best2exp'
76
+ - 'best2bin'
77
+
78
+ The default is 'best1bin'. Strategies that may be implemented are
79
+ outlined in 'Notes'.
80
+ Alternatively the differential evolution strategy can be customized by
81
+ providing a callable that constructs a trial vector. The callable must
82
+ have the form ``strategy(candidate: int, population: np.ndarray, rng=None)``,
83
+ where ``candidate`` is an integer specifying which entry of the
84
+ population is being evolved, ``population`` is an array of shape
85
+ ``(S, N)`` containing all the population members (where S is the
86
+ total population size), and ``rng`` is the random number generator
87
+ being used within the solver.
88
+ ``candidate`` will be in the range ``[0, S)``.
89
+ ``strategy`` must return a trial vector with shape `(N,)`. The
90
+ fitness of this trial vector is compared against the fitness of
91
+ ``population[candidate]``.
92
+
93
+ .. versionchanged:: 1.12.0
94
+ Customization of evolution strategy via a callable.
95
+
96
+ maxiter : int, optional
97
+ The maximum number of generations over which the entire population is
98
+ evolved. The maximum number of function evaluations (with no polishing)
99
+ is: ``(maxiter + 1) * popsize * (N - N_equal)``
100
+ popsize : int, optional
101
+ A multiplier for setting the total population size. The population has
102
+ ``popsize * (N - N_equal)`` individuals. This keyword is overridden if
103
+ an initial population is supplied via the `init` keyword. When using
104
+ ``init='sobol'`` the population size is calculated as the next power
105
+ of 2 after ``popsize * (N - N_equal)``.
106
+ tol : float, optional
107
+ Relative tolerance for convergence, the solving stops when
108
+ ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
109
+ where and `atol` and `tol` are the absolute and relative tolerance
110
+ respectively.
111
+ mutation : float or tuple(float, float), optional
112
+ The mutation constant. In the literature this is also known as
113
+ differential weight, being denoted by F.
114
+ If specified as a float it should be in the range [0, 2].
115
+ If specified as a tuple ``(min, max)`` dithering is employed. Dithering
116
+ randomly changes the mutation constant on a generation by generation
117
+ basis. The mutation constant for that generation is taken from
118
+ ``U[min, max)``. Dithering can help speed convergence significantly.
119
+ Increasing the mutation constant increases the search radius, but will
120
+ slow down convergence.
121
+ recombination : float, optional
122
+ The recombination constant, should be in the range [0, 1]. In the
123
+ literature this is also known as the crossover probability, being
124
+ denoted by CR. Increasing this value allows a larger number of mutants
125
+ to progress into the next generation, but at the risk of population
126
+ stability.
127
+ seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
128
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
129
+ singleton is used.
130
+ If `seed` is an int, a new ``RandomState`` instance is used,
131
+ seeded with `seed`.
132
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
133
+ that instance is used.
134
+ Specify `seed` for repeatable minimizations.
135
+ disp : bool, optional
136
+ Prints the evaluated `func` at every iteration.
137
+ callback : callable, optional
138
+ A callable called after each iteration. Has the signature:
139
+
140
+ ``callback(intermediate_result: OptimizeResult)``
141
+
142
+ where ``intermediate_result`` is a keyword parameter containing an
143
+ `OptimizeResult` with attributes ``x`` and ``fun``, the best solution
144
+ found so far and the objective function. Note that the name
145
+ of the parameter must be ``intermediate_result`` for the callback
146
+ to be passed an `OptimizeResult`.
147
+
148
+ The callback also supports a signature like:
149
+
150
+ ``callback(x, convergence: float=val)``
151
+
152
+ ``val`` represents the fractional value of the population convergence.
153
+ When ``val`` is greater than ``1.0``, the function halts.
154
+
155
+ Introspection is used to determine which of the signatures is invoked.
156
+
157
+ Global minimization will halt if the callback raises ``StopIteration``
158
+ or returns ``True``; any polishing is still carried out.
159
+
160
+ .. versionchanged:: 1.12.0
161
+ callback accepts the ``intermediate_result`` keyword.
162
+
163
+ polish : bool, optional
164
+ If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
165
+ method is used to polish the best population member at the end, which
166
+ can improve the minimization slightly. If a constrained problem is
167
+ being studied then the `trust-constr` method is used instead. For large
168
+ problems with many constraints, polishing can take a long time due to
169
+ the Jacobian computations.
170
+ init : str or array-like, optional
171
+ Specify which type of population initialization is performed. Should be
172
+ one of:
173
+
174
+ - 'latinhypercube'
175
+ - 'sobol'
176
+ - 'halton'
177
+ - 'random'
178
+ - array specifying the initial population. The array should have
179
+ shape ``(S, N)``, where S is the total population size and N is
180
+ the number of parameters.
181
+ `init` is clipped to `bounds` before use.
182
+
183
+ The default is 'latinhypercube'. Latin Hypercube sampling tries to
184
+ maximize coverage of the available parameter space.
185
+
186
+ 'sobol' and 'halton' are superior alternatives and maximize even more
187
+ the parameter space. 'sobol' will enforce an initial population
188
+ size which is calculated as the next power of 2 after
189
+ ``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
190
+ less efficient. See `scipy.stats.qmc` for more details.
191
+
192
+ 'random' initializes the population randomly - this has the drawback
193
+ that clustering can occur, preventing the whole of parameter space
194
+ being covered. Use of an array to specify a population could be used,
195
+ for example, to create a tight bunch of initial guesses in an location
196
+ where the solution is known to exist, thereby reducing time for
197
+ convergence.
198
+ atol : float, optional
199
+ Absolute tolerance for convergence, the solving stops when
200
+ ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
201
+ where and `atol` and `tol` are the absolute and relative tolerance
202
+ respectively.
203
+ updating : {'immediate', 'deferred'}, optional
204
+ If ``'immediate'``, the best solution vector is continuously updated
205
+ within a single generation [4]_. This can lead to faster convergence as
206
+ trial vectors can take advantage of continuous improvements in the best
207
+ solution.
208
+ With ``'deferred'``, the best solution vector is updated once per
209
+ generation. Only ``'deferred'`` is compatible with parallelization or
210
+ vectorization, and the `workers` and `vectorized` keywords can
211
+ over-ride this option.
212
+
213
+ .. versionadded:: 1.2.0
214
+
215
+ workers : int or map-like callable, optional
216
+ If `workers` is an int the population is subdivided into `workers`
217
+ sections and evaluated in parallel
218
+ (uses `multiprocessing.Pool <multiprocessing>`).
219
+ Supply -1 to use all available CPU cores.
220
+ Alternatively supply a map-like callable, such as
221
+ `multiprocessing.Pool.map` for evaluating the population in parallel.
222
+ This evaluation is carried out as ``workers(func, iterable)``.
223
+ This option will override the `updating` keyword to
224
+ ``updating='deferred'`` if ``workers != 1``.
225
+ This option overrides the `vectorized` keyword if ``workers != 1``.
226
+ Requires that `func` be pickleable.
227
+
228
+ .. versionadded:: 1.2.0
229
+
230
+ constraints : {NonLinearConstraint, LinearConstraint, Bounds}
231
+ Constraints on the solver, over and above those applied by the `bounds`
232
+ kwd. Uses the approach by Lampinen [5]_.
233
+
234
+ .. versionadded:: 1.4.0
235
+
236
+ x0 : None or array-like, optional
237
+ Provides an initial guess to the minimization. Once the population has
238
+ been initialized this vector replaces the first (best) member. This
239
+ replacement is done even if `init` is given an initial population.
240
+ ``x0.shape == (N,)``.
241
+
242
+ .. versionadded:: 1.7.0
243
+
244
+ integrality : 1-D array, optional
245
+ For each decision variable, a boolean value indicating whether the
246
+ decision variable is constrained to integer values. The array is
247
+ broadcast to ``(N,)``.
248
+ If any decision variables are constrained to be integral, they will not
249
+ be changed during polishing.
250
+ Only integer values lying between the lower and upper bounds are used.
251
+ If there are no integer values lying between the bounds then a
252
+ `ValueError` is raised.
253
+
254
+ .. versionadded:: 1.9.0
255
+
256
+ vectorized : bool, optional
257
+ If ``vectorized is True``, `func` is sent an `x` array with
258
+ ``x.shape == (N, S)``, and is expected to return an array of shape
259
+ ``(S,)``, where `S` is the number of solution vectors to be calculated.
260
+ If constraints are applied, each of the functions used to construct
261
+ a `Constraint` object should accept an `x` array with
262
+ ``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
263
+ `M` is the number of constraint components.
264
+ This option is an alternative to the parallelization offered by
265
+ `workers`, and may help in optimization speed by reducing interpreter
266
+ overhead from multiple function calls. This keyword is ignored if
267
+ ``workers != 1``.
268
+ This option will override the `updating` keyword to
269
+ ``updating='deferred'``.
270
+ See the notes section for further discussion on when to use
271
+ ``'vectorized'``, and when to use ``'workers'``.
272
+
273
+ .. versionadded:: 1.9.0
274
+
275
+ Returns
276
+ -------
277
+ res : OptimizeResult
278
+ The optimization result represented as a `OptimizeResult` object.
279
+ Important attributes are: ``x`` the solution array, ``success`` a
280
+ Boolean flag indicating if the optimizer exited successfully,
281
+ ``message`` which describes the cause of the termination,
282
+ ``population`` the solution vectors present in the population, and
283
+ ``population_energies`` the value of the objective function for each
284
+ entry in ``population``.
285
+ See `OptimizeResult` for a description of other attributes. If `polish`
286
+ was employed, and a lower minimum was obtained by the polishing, then
287
+ OptimizeResult also contains the ``jac`` attribute.
288
+ If the eventual solution does not satisfy the applied constraints
289
+ ``success`` will be `False`.
290
+
291
+ Notes
292
+ -----
293
+ Differential evolution is a stochastic population based method that is
294
+ useful for global optimization problems. At each pass through the
295
+ population the algorithm mutates each candidate solution by mixing with
296
+ other candidate solutions to create a trial candidate. There are several
297
+ strategies [3]_ for creating trial candidates, which suit some problems
298
+ more than others. The 'best1bin' strategy is a good starting point for
299
+ many systems. In this strategy two members of the population are randomly
300
+ chosen. Their difference is used to mutate the best member (the 'best' in
301
+ 'best1bin'), :math:`x_0`, so far:
302
+
303
+ .. math::
304
+
305
+ b' = x_0 + mutation * (x_{r_0} - x_{r_1})
306
+
307
+ A trial vector is then constructed. Starting with a randomly chosen ith
308
+ parameter the trial is sequentially filled (in modulo) with parameters
309
+ from ``b'`` or the original candidate. The choice of whether to use ``b'``
310
+ or the original candidate is made with a binomial distribution (the 'bin'
311
+ in 'best1bin') - a random number in [0, 1) is generated. If this number is
312
+ less than the `recombination` constant then the parameter is loaded from
313
+ ``b'``, otherwise it is loaded from the original candidate. The final
314
+ parameter is always loaded from ``b'``. Once the trial candidate is built
315
+ its fitness is assessed. If the trial is better than the original candidate
316
+ then it takes its place. If it is also better than the best overall
317
+ candidate it also replaces that.
318
+
319
+ The other strategies available are outlined in Qiang and
320
+ Mitchell (2014) [3]_.
321
+
322
+ .. math::
323
+ rand1* : b' = x_{r_0} + mutation*(x_{r_1} - x_{r_2})
324
+
325
+ rand2* : b' = x_{r_0} + mutation*(x_{r_1} + x_{r_2}
326
+ - x_{r_3} - x_{r_4})
327
+
328
+ best1* : b' = x_0 + mutation*(x_{r_0} - x_{r_1})
329
+
330
+ best2* : b' = x_0 + mutation*(x_{r_0} + x_{r_1}
331
+ - x_{r_2} - x_{r_3})
332
+
333
+ currenttobest1* : b' = x_i + mutation*(x_0 - x_i
334
+ + x_{r_0} - x_{r_1})
335
+
336
+ randtobest1* : b' = x_{r_0} + mutation*(x_0 - x_{r_0}
337
+ + x_{r_1} - x_{r_2})
338
+
339
+ where the integers :math:`r_0, r_1, r_2, r_3, r_4` are chosen randomly
340
+ from the interval [0, NP) with `NP` being the total population size and
341
+ the original candidate having index `i`. The user can fully customize the
342
+ generation of the trial candidates by supplying a callable to ``strategy``.
343
+
344
+ To improve your chances of finding a global minimum use higher `popsize`
345
+ values, with higher `mutation` and (dithering), but lower `recombination`
346
+ values. This has the effect of widening the search radius, but slowing
347
+ convergence.
348
+
349
+ By default the best solution vector is updated continuously within a single
350
+ iteration (``updating='immediate'``). This is a modification [4]_ of the
351
+ original differential evolution algorithm which can lead to faster
352
+ convergence as trial vectors can immediately benefit from improved
353
+ solutions. To use the original Storn and Price behaviour, updating the best
354
+ solution once per iteration, set ``updating='deferred'``.
355
+ The ``'deferred'`` approach is compatible with both parallelization and
356
+ vectorization (``'workers'`` and ``'vectorized'`` keywords). These may
357
+ improve minimization speed by using computer resources more efficiently.
358
+ The ``'workers'`` distribute calculations over multiple processors. By
359
+ default the Python `multiprocessing` module is used, but other approaches
360
+ are also possible, such as the Message Passing Interface (MPI) used on
361
+ clusters [6]_ [7]_. The overhead from these approaches (creating new
362
+ Processes, etc) may be significant, meaning that computational speed
363
+ doesn't necessarily scale with the number of processors used.
364
+ Parallelization is best suited to computationally expensive objective
365
+ functions. If the objective function is less expensive, then
366
+ ``'vectorized'`` may aid by only calling the objective function once per
367
+ iteration, rather than multiple times for all the population members; the
368
+ interpreter overhead is reduced.
369
+
370
+ .. versionadded:: 0.15.0
371
+
372
+ References
373
+ ----------
374
+ .. [1] Differential evolution, Wikipedia,
375
+ http://en.wikipedia.org/wiki/Differential_evolution
376
+ .. [2] Storn, R and Price, K, Differential Evolution - a Simple and
377
+ Efficient Heuristic for Global Optimization over Continuous Spaces,
378
+ Journal of Global Optimization, 1997, 11, 341 - 359.
379
+ .. [3] Qiang, J., Mitchell, C., A Unified Differential Evolution Algorithm
380
+ for Global Optimization, 2014, https://www.osti.gov/servlets/purl/1163659
381
+ .. [4] Wormington, M., Panaccione, C., Matney, K. M., Bowen, D. K., -
382
+ Characterization of structures from X-ray scattering data using
383
+ genetic algorithms, Phil. Trans. R. Soc. Lond. A, 1999, 357,
384
+ 2827-2848
385
+ .. [5] Lampinen, J., A constraint handling approach for the differential
386
+ evolution algorithm. Proceedings of the 2002 Congress on
387
+ Evolutionary Computation. CEC'02 (Cat. No. 02TH8600). Vol. 2. IEEE,
388
+ 2002.
389
+ .. [6] https://mpi4py.readthedocs.io/en/stable/
390
+ .. [7] https://schwimmbad.readthedocs.io/en/latest/
391
+
392
+
393
+ Examples
394
+ --------
395
+ Let us consider the problem of minimizing the Rosenbrock function. This
396
+ function is implemented in `rosen` in `scipy.optimize`.
397
+
398
+ >>> import numpy as np
399
+ >>> from scipy.optimize import rosen, differential_evolution
400
+ >>> bounds = [(0,2), (0, 2), (0, 2), (0, 2), (0, 2)]
401
+ >>> result = differential_evolution(rosen, bounds)
402
+ >>> result.x, result.fun
403
+ (array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
404
+
405
+ Now repeat, but with parallelization.
406
+
407
+ >>> result = differential_evolution(rosen, bounds, updating='deferred',
408
+ ... workers=2)
409
+ >>> result.x, result.fun
410
+ (array([1., 1., 1., 1., 1.]), 1.9216496320061384e-19)
411
+
412
+ Let's do a constrained minimization.
413
+
414
+ >>> from scipy.optimize import LinearConstraint, Bounds
415
+
416
+ We add the constraint that the sum of ``x[0]`` and ``x[1]`` must be less
417
+ than or equal to 1.9. This is a linear constraint, which may be written
418
+ ``A @ x <= 1.9``, where ``A = array([[1, 1]])``. This can be encoded as
419
+ a `LinearConstraint` instance:
420
+
421
+ >>> lc = LinearConstraint([[1, 1]], -np.inf, 1.9)
422
+
423
+ Specify limits using a `Bounds` object.
424
+
425
+ >>> bounds = Bounds([0., 0.], [2., 2.])
426
+ >>> result = differential_evolution(rosen, bounds, constraints=lc,
427
+ ... seed=1)
428
+ >>> result.x, result.fun
429
+ (array([0.96632622, 0.93367155]), 0.0011352416852625719)
430
+
431
+ Next find the minimum of the Ackley function
432
+ (https://en.wikipedia.org/wiki/Test_functions_for_optimization).
433
+
434
+ >>> def ackley(x):
435
+ ... arg1 = -0.2 * np.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2))
436
+ ... arg2 = 0.5 * (np.cos(2. * np.pi * x[0]) + np.cos(2. * np.pi * x[1]))
437
+ ... return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e
438
+ >>> bounds = [(-5, 5), (-5, 5)]
439
+ >>> result = differential_evolution(ackley, bounds, seed=1)
440
+ >>> result.x, result.fun
441
+ (array([0., 0.]), 4.440892098500626e-16)
442
+
443
+ The Ackley function is written in a vectorized manner, so the
444
+ ``'vectorized'`` keyword can be employed. Note the reduced number of
445
+ function evaluations.
446
+
447
+ >>> result = differential_evolution(
448
+ ... ackley, bounds, vectorized=True, updating='deferred', seed=1
449
+ ... )
450
+ >>> result.x, result.fun
451
+ (array([0., 0.]), 4.440892098500626e-16)
452
+
453
+ The following custom strategy function mimics 'best1bin':
454
+
455
+ >>> def custom_strategy_fn(candidate, population, rng=None):
456
+ ... parameter_count = population.shape(-1)
457
+ ... mutation, recombination = 0.7, 0.9
458
+ ... trial = np.copy(population[candidate])
459
+ ... fill_point = rng.choice(parameter_count)
460
+ ...
461
+ ... pool = np.arange(len(population))
462
+ ... rng.shuffle(pool)
463
+ ...
464
+ ... # two unique random numbers that aren't the same, and
465
+ ... # aren't equal to candidate.
466
+ ... idxs = []
467
+ ... while len(idxs) < 2 and len(pool) > 0:
468
+ ... idx = pool[0]
469
+ ... pool = pool[1:]
470
+ ... if idx != candidate:
471
+ ... idxs.append(idx)
472
+ ...
473
+ ... r0, r1 = idxs[:2]
474
+ ...
475
+ ... bprime = (population[0] + mutation *
476
+ ... (population[r0] - population[r1]))
477
+ ...
478
+ ... crossovers = rng.uniform(size=parameter_count)
479
+ ... crossovers = crossovers < recombination
480
+ ... crossovers[fill_point] = True
481
+ ... trial = np.where(crossovers, bprime, trial)
482
+ ... return trial
483
+
484
+ """
485
+
486
+ # using a context manager means that any created Pool objects are
487
+ # cleared up.
488
+ with DifferentialEvolutionSolver(func, bounds, args=args,
489
+ strategy=strategy,
490
+ maxiter=maxiter,
491
+ popsize=popsize, tol=tol,
492
+ mutation=mutation,
493
+ recombination=recombination,
494
+ seed=seed, polish=polish,
495
+ callback=callback,
496
+ disp=disp, init=init, atol=atol,
497
+ updating=updating,
498
+ workers=workers,
499
+ constraints=constraints,
500
+ x0=x0,
501
+ integrality=integrality,
502
+ vectorized=vectorized) as solver:
503
+ ret = solver.solve()
504
+
505
+ return ret
506
+
507
+
508
+ class DifferentialEvolutionSolver:
509
+
510
+ """This class implements the differential evolution solver
511
+
512
+ Parameters
513
+ ----------
514
+ func : callable
515
+ The objective function to be minimized. Must be in the form
516
+ ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
517
+ and ``args`` is a tuple of any additional fixed parameters needed to
518
+ completely specify the function. The number of parameters, N, is equal
519
+ to ``len(x)``.
520
+ bounds : sequence or `Bounds`
521
+ Bounds for variables. There are two ways to specify the bounds:
522
+
523
+ 1. Instance of `Bounds` class.
524
+ 2. ``(min, max)`` pairs for each element in ``x``, defining the
525
+ finite lower and upper bounds for the optimizing argument of
526
+ `func`.
527
+
528
+ The total number of bounds is used to determine the number of
529
+ parameters, N. If there are parameters whose bounds are equal the total
530
+ number of free parameters is ``N - N_equal``.
531
+ args : tuple, optional
532
+ Any additional fixed parameters needed to
533
+ completely specify the objective function.
534
+ strategy : {str, callable}, optional
535
+ The differential evolution strategy to use. Should be one of:
536
+
537
+ - 'best1bin'
538
+ - 'best1exp'
539
+ - 'rand1bin'
540
+ - 'rand1exp'
541
+ - 'rand2bin'
542
+ - 'rand2exp'
543
+ - 'randtobest1bin'
544
+ - 'randtobest1exp'
545
+ - 'currenttobest1bin'
546
+ - 'currenttobest1exp'
547
+ - 'best2exp'
548
+ - 'best2bin'
549
+
550
+ The default is 'best1bin'. Strategies that may be
551
+ implemented are outlined in 'Notes'.
552
+
553
+ Alternatively the differential evolution strategy can be customized
554
+ by providing a callable that constructs a trial vector. The callable
555
+ must have the form
556
+ ``strategy(candidate: int, population: np.ndarray, rng=None)``,
557
+ where ``candidate`` is an integer specifying which entry of the
558
+ population is being evolved, ``population`` is an array of shape
559
+ ``(S, N)`` containing all the population members (where S is the
560
+ total population size), and ``rng`` is the random number generator
561
+ being used within the solver.
562
+ ``candidate`` will be in the range ``[0, S)``.
563
+ ``strategy`` must return a trial vector with shape `(N,)`. The
564
+ fitness of this trial vector is compared against the fitness of
565
+ ``population[candidate]``.
566
+ maxiter : int, optional
567
+ The maximum number of generations over which the entire population is
568
+ evolved. The maximum number of function evaluations (with no polishing)
569
+ is: ``(maxiter + 1) * popsize * (N - N_equal)``
570
+ popsize : int, optional
571
+ A multiplier for setting the total population size. The population has
572
+ ``popsize * (N - N_equal)`` individuals. This keyword is overridden if
573
+ an initial population is supplied via the `init` keyword. When using
574
+ ``init='sobol'`` the population size is calculated as the next power
575
+ of 2 after ``popsize * (N - N_equal)``.
576
+ tol : float, optional
577
+ Relative tolerance for convergence, the solving stops when
578
+ ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
579
+ where and `atol` and `tol` are the absolute and relative tolerance
580
+ respectively.
581
+ mutation : float or tuple(float, float), optional
582
+ The mutation constant. In the literature this is also known as
583
+ differential weight, being denoted by F.
584
+ If specified as a float it should be in the range [0, 2].
585
+ If specified as a tuple ``(min, max)`` dithering is employed. Dithering
586
+ randomly changes the mutation constant on a generation by generation
587
+ basis. The mutation constant for that generation is taken from
588
+ U[min, max). Dithering can help speed convergence significantly.
589
+ Increasing the mutation constant increases the search radius, but will
590
+ slow down convergence.
591
+ recombination : float, optional
592
+ The recombination constant, should be in the range [0, 1]. In the
593
+ literature this is also known as the crossover probability, being
594
+ denoted by CR. Increasing this value allows a larger number of mutants
595
+ to progress into the next generation, but at the risk of population
596
+ stability.
597
+ seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
598
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
599
+ singleton is used.
600
+ If `seed` is an int, a new ``RandomState`` instance is used,
601
+ seeded with `seed`.
602
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
603
+ that instance is used.
604
+ Specify `seed` for repeatable minimizations.
605
+ disp : bool, optional
606
+ Prints the evaluated `func` at every iteration.
607
+ callback : callable, optional
608
+ A callable called after each iteration. Has the signature:
609
+
610
+ ``callback(intermediate_result: OptimizeResult)``
611
+
612
+ where ``intermediate_result`` is a keyword parameter containing an
613
+ `OptimizeResult` with attributes ``x`` and ``fun``, the best solution
614
+ found so far and the objective function. Note that the name
615
+ of the parameter must be ``intermediate_result`` for the callback
616
+ to be passed an `OptimizeResult`.
617
+
618
+ The callback also supports a signature like:
619
+
620
+ ``callback(x, convergence: float=val)``
621
+
622
+ ``val`` represents the fractional value of the population convergence.
623
+ When ``val`` is greater than ``1.0``, the function halts.
624
+
625
+ Introspection is used to determine which of the signatures is invoked.
626
+
627
+ Global minimization will halt if the callback raises ``StopIteration``
628
+ or returns ``True``; any polishing is still carried out.
629
+
630
+ .. versionchanged:: 1.12.0
631
+ callback accepts the ``intermediate_result`` keyword.
632
+
633
+ polish : bool, optional
634
+ If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B`
635
+ method is used to polish the best population member at the end, which
636
+ can improve the minimization slightly. If a constrained problem is
637
+ being studied then the `trust-constr` method is used instead. For large
638
+ problems with many constraints, polishing can take a long time due to
639
+ the Jacobian computations.
640
+ maxfun : int, optional
641
+ Set the maximum number of function evaluations. However, it probably
642
+ makes more sense to set `maxiter` instead.
643
+ init : str or array-like, optional
644
+ Specify which type of population initialization is performed. Should be
645
+ one of:
646
+
647
+ - 'latinhypercube'
648
+ - 'sobol'
649
+ - 'halton'
650
+ - 'random'
651
+ - array specifying the initial population. The array should have
652
+ shape ``(S, N)``, where S is the total population size and
653
+ N is the number of parameters.
654
+ `init` is clipped to `bounds` before use.
655
+
656
+ The default is 'latinhypercube'. Latin Hypercube sampling tries to
657
+ maximize coverage of the available parameter space.
658
+
659
+ 'sobol' and 'halton' are superior alternatives and maximize even more
660
+ the parameter space. 'sobol' will enforce an initial population
661
+ size which is calculated as the next power of 2 after
662
+ ``popsize * (N - N_equal)``. 'halton' has no requirements but is a bit
663
+ less efficient. See `scipy.stats.qmc` for more details.
664
+
665
+ 'random' initializes the population randomly - this has the drawback
666
+ that clustering can occur, preventing the whole of parameter space
667
+ being covered. Use of an array to specify a population could be used,
668
+ for example, to create a tight bunch of initial guesses in an location
669
+ where the solution is known to exist, thereby reducing time for
670
+ convergence.
671
+ atol : float, optional
672
+ Absolute tolerance for convergence, the solving stops when
673
+ ``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``,
674
+ where and `atol` and `tol` are the absolute and relative tolerance
675
+ respectively.
676
+ updating : {'immediate', 'deferred'}, optional
677
+ If ``'immediate'``, the best solution vector is continuously updated
678
+ within a single generation [4]_. This can lead to faster convergence as
679
+ trial vectors can take advantage of continuous improvements in the best
680
+ solution.
681
+ With ``'deferred'``, the best solution vector is updated once per
682
+ generation. Only ``'deferred'`` is compatible with parallelization or
683
+ vectorization, and the `workers` and `vectorized` keywords can
684
+ over-ride this option.
685
+ workers : int or map-like callable, optional
686
+ If `workers` is an int the population is subdivided into `workers`
687
+ sections and evaluated in parallel
688
+ (uses `multiprocessing.Pool <multiprocessing>`).
689
+ Supply `-1` to use all cores available to the Process.
690
+ Alternatively supply a map-like callable, such as
691
+ `multiprocessing.Pool.map` for evaluating the population in parallel.
692
+ This evaluation is carried out as ``workers(func, iterable)``.
693
+ This option will override the `updating` keyword to
694
+ `updating='deferred'` if `workers != 1`.
695
+ Requires that `func` be pickleable.
696
+ constraints : {NonLinearConstraint, LinearConstraint, Bounds}
697
+ Constraints on the solver, over and above those applied by the `bounds`
698
+ kwd. Uses the approach by Lampinen.
699
+ x0 : None or array-like, optional
700
+ Provides an initial guess to the minimization. Once the population has
701
+ been initialized this vector replaces the first (best) member. This
702
+ replacement is done even if `init` is given an initial population.
703
+ ``x0.shape == (N,)``.
704
+ integrality : 1-D array, optional
705
+ For each decision variable, a boolean value indicating whether the
706
+ decision variable is constrained to integer values. The array is
707
+ broadcast to ``(N,)``.
708
+ If any decision variables are constrained to be integral, they will not
709
+ be changed during polishing.
710
+ Only integer values lying between the lower and upper bounds are used.
711
+ If there are no integer values lying between the bounds then a
712
+ `ValueError` is raised.
713
+ vectorized : bool, optional
714
+ If ``vectorized is True``, `func` is sent an `x` array with
715
+ ``x.shape == (N, S)``, and is expected to return an array of shape
716
+ ``(S,)``, where `S` is the number of solution vectors to be calculated.
717
+ If constraints are applied, each of the functions used to construct
718
+ a `Constraint` object should accept an `x` array with
719
+ ``x.shape == (N, S)``, and return an array of shape ``(M, S)``, where
720
+ `M` is the number of constraint components.
721
+ This option is an alternative to the parallelization offered by
722
+ `workers`, and may help in optimization speed. This keyword is
723
+ ignored if ``workers != 1``.
724
+ This option will override the `updating` keyword to
725
+ ``updating='deferred'``.
726
+ """
727
+
728
+ # Dispatch of mutation strategy method (binomial or exponential).
729
+ _binomial = {'best1bin': '_best1',
730
+ 'randtobest1bin': '_randtobest1',
731
+ 'currenttobest1bin': '_currenttobest1',
732
+ 'best2bin': '_best2',
733
+ 'rand2bin': '_rand2',
734
+ 'rand1bin': '_rand1'}
735
+ _exponential = {'best1exp': '_best1',
736
+ 'rand1exp': '_rand1',
737
+ 'randtobest1exp': '_randtobest1',
738
+ 'currenttobest1exp': '_currenttobest1',
739
+ 'best2exp': '_best2',
740
+ 'rand2exp': '_rand2'}
741
+
742
+ __init_error_msg = ("The population initialization method must be one of "
743
+ "'latinhypercube' or 'random', or an array of shape "
744
+ "(S, N) where N is the number of parameters and S>5")
745
+
746
+ def __init__(self, func, bounds, args=(),
747
+ strategy='best1bin', maxiter=1000, popsize=15,
748
+ tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=None,
749
+ maxfun=np.inf, callback=None, disp=False, polish=True,
750
+ init='latinhypercube', atol=0, updating='immediate',
751
+ workers=1, constraints=(), x0=None, *, integrality=None,
752
+ vectorized=False):
753
+
754
+ if callable(strategy):
755
+ # a callable strategy is going to be stored in self.strategy anyway
756
+ pass
757
+ elif strategy in self._binomial:
758
+ self.mutation_func = getattr(self, self._binomial[strategy])
759
+ elif strategy in self._exponential:
760
+ self.mutation_func = getattr(self, self._exponential[strategy])
761
+ else:
762
+ raise ValueError("Please select a valid mutation strategy")
763
+ self.strategy = strategy
764
+
765
+ self.callback = _wrap_callback(callback, "differential_evolution")
766
+ self.polish = polish
767
+
768
+ # set the updating / parallelisation options
769
+ if updating in ['immediate', 'deferred']:
770
+ self._updating = updating
771
+
772
+ self.vectorized = vectorized
773
+
774
+ # want to use parallelisation, but updating is immediate
775
+ if workers != 1 and updating == 'immediate':
776
+ warnings.warn("differential_evolution: the 'workers' keyword has"
777
+ " overridden updating='immediate' to"
778
+ " updating='deferred'", UserWarning, stacklevel=2)
779
+ self._updating = 'deferred'
780
+
781
+ if vectorized and workers != 1:
782
+ warnings.warn("differential_evolution: the 'workers' keyword"
783
+ " overrides the 'vectorized' keyword", stacklevel=2)
784
+ self.vectorized = vectorized = False
785
+
786
+ if vectorized and updating == 'immediate':
787
+ warnings.warn("differential_evolution: the 'vectorized' keyword"
788
+ " has overridden updating='immediate' to updating"
789
+ "='deferred'", UserWarning, stacklevel=2)
790
+ self._updating = 'deferred'
791
+
792
+ # an object with a map method.
793
+ if vectorized:
794
+ def maplike_for_vectorized_func(func, x):
795
+ # send an array (N, S) to the user func,
796
+ # expect to receive (S,). Transposition is required because
797
+ # internally the population is held as (S, N)
798
+ return np.atleast_1d(func(x.T))
799
+ workers = maplike_for_vectorized_func
800
+
801
+ self._mapwrapper = MapWrapper(workers)
802
+
803
+ # relative and absolute tolerances for convergence
804
+ self.tol, self.atol = tol, atol
805
+
806
+ # Mutation constant should be in [0, 2). If specified as a sequence
807
+ # then dithering is performed.
808
+ self.scale = mutation
809
+ if (not np.all(np.isfinite(mutation)) or
810
+ np.any(np.array(mutation) >= 2) or
811
+ np.any(np.array(mutation) < 0)):
812
+ raise ValueError('The mutation constant must be a float in '
813
+ 'U[0, 2), or specified as a tuple(min, max)'
814
+ ' where min < max and min, max are in U[0, 2).')
815
+
816
+ self.dither = None
817
+ if hasattr(mutation, '__iter__') and len(mutation) > 1:
818
+ self.dither = [mutation[0], mutation[1]]
819
+ self.dither.sort()
820
+
821
+ self.cross_over_probability = recombination
822
+
823
+ # we create a wrapped function to allow the use of map (and Pool.map
824
+ # in the future)
825
+ self.func = _FunctionWrapper(func, args)
826
+ self.args = args
827
+
828
+ # convert tuple of lower and upper bounds to limits
829
+ # [(low_0, high_0), ..., (low_n, high_n]
830
+ # -> [[low_0, ..., low_n], [high_0, ..., high_n]]
831
+ if isinstance(bounds, Bounds):
832
+ self.limits = np.array(new_bounds_to_old(bounds.lb,
833
+ bounds.ub,
834
+ len(bounds.lb)),
835
+ dtype=float).T
836
+ else:
837
+ self.limits = np.array(bounds, dtype='float').T
838
+
839
+ if (np.size(self.limits, 0) != 2 or not
840
+ np.all(np.isfinite(self.limits))):
841
+ raise ValueError('bounds should be a sequence containing finite '
842
+ 'real valued (min, max) pairs for each value'
843
+ ' in x')
844
+
845
+ if maxiter is None: # the default used to be None
846
+ maxiter = 1000
847
+ self.maxiter = maxiter
848
+ if maxfun is None: # the default used to be None
849
+ maxfun = np.inf
850
+ self.maxfun = maxfun
851
+
852
+ # population is scaled to between [0, 1].
853
+ # We have to scale between parameter <-> population
854
+ # save these arguments for _scale_parameter and
855
+ # _unscale_parameter. This is an optimization
856
+ self.__scale_arg1 = 0.5 * (self.limits[0] + self.limits[1])
857
+ self.__scale_arg2 = np.fabs(self.limits[0] - self.limits[1])
858
+ with np.errstate(divide='ignore'):
859
+ # if lb == ub then the following line will be 1/0, which is why
860
+ # we ignore the divide by zero warning. The result from 1/0 is
861
+ # inf, so replace those values by 0.
862
+ self.__recip_scale_arg2 = 1 / self.__scale_arg2
863
+ self.__recip_scale_arg2[~np.isfinite(self.__recip_scale_arg2)] = 0
864
+
865
+ self.parameter_count = np.size(self.limits, 1)
866
+
867
+ self.random_number_generator = check_random_state(seed)
868
+
869
+ # Which parameters are going to be integers?
870
+ if np.any(integrality):
871
+ # # user has provided a truth value for integer constraints
872
+ integrality = np.broadcast_to(
873
+ integrality,
874
+ self.parameter_count
875
+ )
876
+ integrality = np.asarray(integrality, bool)
877
+ # For integrality parameters change the limits to only allow
878
+ # integer values lying between the limits.
879
+ lb, ub = np.copy(self.limits)
880
+
881
+ lb = np.ceil(lb)
882
+ ub = np.floor(ub)
883
+ if not (lb[integrality] <= ub[integrality]).all():
884
+ # there's a parameter that doesn't have an integer value
885
+ # lying between the limits
886
+ raise ValueError("One of the integrality constraints does not"
887
+ " have any possible integer values between"
888
+ " the lower/upper bounds.")
889
+ nlb = np.nextafter(lb[integrality] - 0.5, np.inf)
890
+ nub = np.nextafter(ub[integrality] + 0.5, -np.inf)
891
+
892
+ self.integrality = integrality
893
+ self.limits[0, self.integrality] = nlb
894
+ self.limits[1, self.integrality] = nub
895
+ else:
896
+ self.integrality = False
897
+
898
+ # check for equal bounds
899
+ eb = self.limits[0] == self.limits[1]
900
+ eb_count = np.count_nonzero(eb)
901
+
902
+ # default population initialization is a latin hypercube design, but
903
+ # there are other population initializations possible.
904
+ # the minimum is 5 because 'best2bin' requires a population that's at
905
+ # least 5 long
906
+ # 202301 - reduced population size to account for parameters with
907
+ # equal bounds. If there are no varying parameters set N to at least 1
908
+ self.num_population_members = max(
909
+ 5,
910
+ popsize * max(1, self.parameter_count - eb_count)
911
+ )
912
+ self.population_shape = (self.num_population_members,
913
+ self.parameter_count)
914
+
915
+ self._nfev = 0
916
+ # check first str otherwise will fail to compare str with array
917
+ if isinstance(init, str):
918
+ if init == 'latinhypercube':
919
+ self.init_population_lhs()
920
+ elif init == 'sobol':
921
+ # must be Ns = 2**m for Sobol'
922
+ n_s = int(2 ** np.ceil(np.log2(self.num_population_members)))
923
+ self.num_population_members = n_s
924
+ self.population_shape = (self.num_population_members,
925
+ self.parameter_count)
926
+ self.init_population_qmc(qmc_engine='sobol')
927
+ elif init == 'halton':
928
+ self.init_population_qmc(qmc_engine='halton')
929
+ elif init == 'random':
930
+ self.init_population_random()
931
+ else:
932
+ raise ValueError(self.__init_error_msg)
933
+ else:
934
+ self.init_population_array(init)
935
+
936
+ if x0 is not None:
937
+ # scale to within unit interval and
938
+ # ensure parameters are within bounds.
939
+ x0_scaled = self._unscale_parameters(np.asarray(x0))
940
+ if ((x0_scaled > 1.0) | (x0_scaled < 0.0)).any():
941
+ raise ValueError(
942
+ "Some entries in x0 lay outside the specified bounds"
943
+ )
944
+ self.population[0] = x0_scaled
945
+
946
+ # infrastructure for constraints
947
+ self.constraints = constraints
948
+ self._wrapped_constraints = []
949
+
950
+ if hasattr(constraints, '__len__'):
951
+ # sequence of constraints, this will also deal with default
952
+ # keyword parameter
953
+ for c in constraints:
954
+ self._wrapped_constraints.append(
955
+ _ConstraintWrapper(c, self.x)
956
+ )
957
+ else:
958
+ self._wrapped_constraints = [
959
+ _ConstraintWrapper(constraints, self.x)
960
+ ]
961
+ self.total_constraints = np.sum(
962
+ [c.num_constr for c in self._wrapped_constraints]
963
+ )
964
+ self.constraint_violation = np.zeros((self.num_population_members, 1))
965
+ self.feasible = np.ones(self.num_population_members, bool)
966
+
967
+ # an array to shuffle when selecting candidates. Create it here
968
+ # rather than repeatedly creating it in _select_samples.
969
+ self._random_population_index = np.arange(self.num_population_members)
970
+ self.disp = disp
971
+
972
+ def init_population_lhs(self):
973
+ """
974
+ Initializes the population with Latin Hypercube Sampling.
975
+ Latin Hypercube Sampling ensures that each parameter is uniformly
976
+ sampled over its range.
977
+ """
978
+ rng = self.random_number_generator
979
+
980
+ # Each parameter range needs to be sampled uniformly. The scaled
981
+ # parameter range ([0, 1)) needs to be split into
982
+ # `self.num_population_members` segments, each of which has the following
983
+ # size:
984
+ segsize = 1.0 / self.num_population_members
985
+
986
+ # Within each segment we sample from a uniform random distribution.
987
+ # We need to do this sampling for each parameter.
988
+ samples = (segsize * rng.uniform(size=self.population_shape)
989
+
990
+ # Offset each segment to cover the entire parameter range [0, 1)
991
+ + np.linspace(0., 1., self.num_population_members,
992
+ endpoint=False)[:, np.newaxis])
993
+
994
+ # Create an array for population of candidate solutions.
995
+ self.population = np.zeros_like(samples)
996
+
997
+ # Initialize population of candidate solutions by permutation of the
998
+ # random samples.
999
+ for j in range(self.parameter_count):
1000
+ order = rng.permutation(range(self.num_population_members))
1001
+ self.population[:, j] = samples[order, j]
1002
+
1003
+ # reset population energies
1004
+ self.population_energies = np.full(self.num_population_members,
1005
+ np.inf)
1006
+
1007
+ # reset number of function evaluations counter
1008
+ self._nfev = 0
1009
+
1010
+ def init_population_qmc(self, qmc_engine):
1011
+ """Initializes the population with a QMC method.
1012
+
1013
+ QMC methods ensures that each parameter is uniformly
1014
+ sampled over its range.
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ qmc_engine : str
1019
+ The QMC method to use for initialization. Can be one of
1020
+ ``latinhypercube``, ``sobol`` or ``halton``.
1021
+
1022
+ """
1023
+ from scipy.stats import qmc
1024
+
1025
+ rng = self.random_number_generator
1026
+
1027
+ # Create an array for population of candidate solutions.
1028
+ if qmc_engine == 'latinhypercube':
1029
+ sampler = qmc.LatinHypercube(d=self.parameter_count, seed=rng)
1030
+ elif qmc_engine == 'sobol':
1031
+ sampler = qmc.Sobol(d=self.parameter_count, seed=rng)
1032
+ elif qmc_engine == 'halton':
1033
+ sampler = qmc.Halton(d=self.parameter_count, seed=rng)
1034
+ else:
1035
+ raise ValueError(self.__init_error_msg)
1036
+
1037
+ self.population = sampler.random(n=self.num_population_members)
1038
+
1039
+ # reset population energies
1040
+ self.population_energies = np.full(self.num_population_members,
1041
+ np.inf)
1042
+
1043
+ # reset number of function evaluations counter
1044
+ self._nfev = 0
1045
+
1046
+ def init_population_random(self):
1047
+ """
1048
+ Initializes the population at random. This type of initialization
1049
+ can possess clustering, Latin Hypercube sampling is generally better.
1050
+ """
1051
+ rng = self.random_number_generator
1052
+ self.population = rng.uniform(size=self.population_shape)
1053
+
1054
+ # reset population energies
1055
+ self.population_energies = np.full(self.num_population_members,
1056
+ np.inf)
1057
+
1058
+ # reset number of function evaluations counter
1059
+ self._nfev = 0
1060
+
1061
+ def init_population_array(self, init):
1062
+ """
1063
+ Initializes the population with a user specified population.
1064
+
1065
+ Parameters
1066
+ ----------
1067
+ init : np.ndarray
1068
+ Array specifying subset of the initial population. The array should
1069
+ have shape (S, N), where N is the number of parameters.
1070
+ The population is clipped to the lower and upper bounds.
1071
+ """
1072
+ # make sure you're using a float array
1073
+ popn = np.asarray(init, dtype=np.float64)
1074
+
1075
+ if (np.size(popn, 0) < 5 or
1076
+ popn.shape[1] != self.parameter_count or
1077
+ len(popn.shape) != 2):
1078
+ raise ValueError("The population supplied needs to have shape"
1079
+ " (S, len(x)), where S > 4.")
1080
+
1081
+ # scale values and clip to bounds, assigning to population
1082
+ self.population = np.clip(self._unscale_parameters(popn), 0, 1)
1083
+
1084
+ self.num_population_members = np.size(self.population, 0)
1085
+
1086
+ self.population_shape = (self.num_population_members,
1087
+ self.parameter_count)
1088
+
1089
+ # reset population energies
1090
+ self.population_energies = np.full(self.num_population_members,
1091
+ np.inf)
1092
+
1093
+ # reset number of function evaluations counter
1094
+ self._nfev = 0
1095
+
1096
+ @property
1097
+ def x(self):
1098
+ """
1099
+ The best solution from the solver
1100
+ """
1101
+ return self._scale_parameters(self.population[0])
1102
+
1103
+ @property
1104
+ def convergence(self):
1105
+ """
1106
+ The standard deviation of the population energies divided by their
1107
+ mean.
1108
+ """
1109
+ if np.any(np.isinf(self.population_energies)):
1110
+ return np.inf
1111
+ return (np.std(self.population_energies) /
1112
+ (np.abs(np.mean(self.population_energies)) + _MACHEPS))
1113
+
1114
+ def converged(self):
1115
+ """
1116
+ Return True if the solver has converged.
1117
+ """
1118
+ if np.any(np.isinf(self.population_energies)):
1119
+ return False
1120
+
1121
+ return (np.std(self.population_energies) <=
1122
+ self.atol +
1123
+ self.tol * np.abs(np.mean(self.population_energies)))
1124
+
1125
+ def solve(self):
1126
+ """
1127
+ Runs the DifferentialEvolutionSolver.
1128
+
1129
+ Returns
1130
+ -------
1131
+ res : OptimizeResult
1132
+ The optimization result represented as a `OptimizeResult` object.
1133
+ Important attributes are: ``x`` the solution array, ``success`` a
1134
+ Boolean flag indicating if the optimizer exited successfully,
1135
+ ``message`` which describes the cause of the termination,
1136
+ ``population`` the solution vectors present in the population, and
1137
+ ``population_energies`` the value of the objective function for
1138
+ each entry in ``population``.
1139
+ See `OptimizeResult` for a description of other attributes. If
1140
+ `polish` was employed, and a lower minimum was obtained by the
1141
+ polishing, then OptimizeResult also contains the ``jac`` attribute.
1142
+ If the eventual solution does not satisfy the applied constraints
1143
+ ``success`` will be `False`.
1144
+ """
1145
+ nit, warning_flag = 0, False
1146
+ status_message = _status_message['success']
1147
+
1148
+ # The population may have just been initialized (all entries are
1149
+ # np.inf). If it has you have to calculate the initial energies.
1150
+ # Although this is also done in the evolve generator it's possible
1151
+ # that someone can set maxiter=0, at which point we still want the
1152
+ # initial energies to be calculated (the following loop isn't run).
1153
+ if np.all(np.isinf(self.population_energies)):
1154
+ self.feasible, self.constraint_violation = (
1155
+ self._calculate_population_feasibilities(self.population))
1156
+
1157
+ # only work out population energies for feasible solutions
1158
+ self.population_energies[self.feasible] = (
1159
+ self._calculate_population_energies(
1160
+ self.population[self.feasible]))
1161
+
1162
+ self._promote_lowest_energy()
1163
+
1164
+ # do the optimization.
1165
+ for nit in range(1, self.maxiter + 1):
1166
+ # evolve the population by a generation
1167
+ try:
1168
+ next(self)
1169
+ except StopIteration:
1170
+ warning_flag = True
1171
+ if self._nfev > self.maxfun:
1172
+ status_message = _status_message['maxfev']
1173
+ elif self._nfev == self.maxfun:
1174
+ status_message = ('Maximum number of function evaluations'
1175
+ ' has been reached.')
1176
+ break
1177
+
1178
+ if self.disp:
1179
+ print(f"differential_evolution step {nit}: f(x)="
1180
+ f" {self.population_energies[0]}"
1181
+ )
1182
+
1183
+ if self.callback:
1184
+ c = self.tol / (self.convergence + _MACHEPS)
1185
+ res = self._result(nit=nit, message="in progress")
1186
+ res.convergence = c
1187
+ try:
1188
+ warning_flag = bool(self.callback(res))
1189
+ except StopIteration:
1190
+ warning_flag = True
1191
+
1192
+ if warning_flag:
1193
+ status_message = 'callback function requested stop early'
1194
+
1195
+ # should the solver terminate?
1196
+ if warning_flag or self.converged():
1197
+ break
1198
+
1199
+ else:
1200
+ status_message = _status_message['maxiter']
1201
+ warning_flag = True
1202
+
1203
+ DE_result = self._result(
1204
+ nit=nit, message=status_message, warning_flag=warning_flag
1205
+ )
1206
+
1207
+ if self.polish and not np.all(self.integrality):
1208
+ # can't polish if all the parameters are integers
1209
+ if np.any(self.integrality):
1210
+ # set the lower/upper bounds equal so that any integrality
1211
+ # constraints work.
1212
+ limits, integrality = self.limits, self.integrality
1213
+ limits[0, integrality] = DE_result.x[integrality]
1214
+ limits[1, integrality] = DE_result.x[integrality]
1215
+
1216
+ polish_method = 'L-BFGS-B'
1217
+
1218
+ if self._wrapped_constraints:
1219
+ polish_method = 'trust-constr'
1220
+
1221
+ constr_violation = self._constraint_violation_fn(DE_result.x)
1222
+ if np.any(constr_violation > 0.):
1223
+ warnings.warn("differential evolution didn't find a "
1224
+ "solution satisfying the constraints, "
1225
+ "attempting to polish from the least "
1226
+ "infeasible solution",
1227
+ UserWarning, stacklevel=2)
1228
+ if self.disp:
1229
+ print(f"Polishing solution with '{polish_method}'")
1230
+ result = minimize(self.func,
1231
+ np.copy(DE_result.x),
1232
+ method=polish_method,
1233
+ bounds=self.limits.T,
1234
+ constraints=self.constraints)
1235
+
1236
+ self._nfev += result.nfev
1237
+ DE_result.nfev = self._nfev
1238
+
1239
+ # Polishing solution is only accepted if there is an improvement in
1240
+ # cost function, the polishing was successful and the solution lies
1241
+ # within the bounds.
1242
+ if (result.fun < DE_result.fun and
1243
+ result.success and
1244
+ np.all(result.x <= self.limits[1]) and
1245
+ np.all(self.limits[0] <= result.x)):
1246
+ DE_result.fun = result.fun
1247
+ DE_result.x = result.x
1248
+ DE_result.jac = result.jac
1249
+ # to keep internal state consistent
1250
+ self.population_energies[0] = result.fun
1251
+ self.population[0] = self._unscale_parameters(result.x)
1252
+
1253
+ if self._wrapped_constraints:
1254
+ DE_result.constr = [c.violation(DE_result.x) for
1255
+ c in self._wrapped_constraints]
1256
+ DE_result.constr_violation = np.max(
1257
+ np.concatenate(DE_result.constr))
1258
+ DE_result.maxcv = DE_result.constr_violation
1259
+ if DE_result.maxcv > 0:
1260
+ # if the result is infeasible then success must be False
1261
+ DE_result.success = False
1262
+ DE_result.message = ("The solution does not satisfy the "
1263
+ f"constraints, MAXCV = {DE_result.maxcv}")
1264
+
1265
+ return DE_result
1266
+
1267
+ def _result(self, **kwds):
1268
+ # form an intermediate OptimizeResult
1269
+ nit = kwds.get('nit', None)
1270
+ message = kwds.get('message', None)
1271
+ warning_flag = kwds.get('warning_flag', False)
1272
+ result = OptimizeResult(
1273
+ x=self.x,
1274
+ fun=self.population_energies[0],
1275
+ nfev=self._nfev,
1276
+ nit=nit,
1277
+ message=message,
1278
+ success=(warning_flag is not True),
1279
+ population=self._scale_parameters(self.population),
1280
+ population_energies=self.population_energies
1281
+ )
1282
+ if self._wrapped_constraints:
1283
+ result.constr = [c.violation(result.x)
1284
+ for c in self._wrapped_constraints]
1285
+ result.constr_violation = np.max(np.concatenate(result.constr))
1286
+ result.maxcv = result.constr_violation
1287
+ if result.maxcv > 0:
1288
+ result.success = False
1289
+
1290
+ return result
1291
+
1292
+ def _calculate_population_energies(self, population):
1293
+ """
1294
+ Calculate the energies of a population.
1295
+
1296
+ Parameters
1297
+ ----------
1298
+ population : ndarray
1299
+ An array of parameter vectors normalised to [0, 1] using lower
1300
+ and upper limits. Has shape ``(np.size(population, 0), N)``.
1301
+
1302
+ Returns
1303
+ -------
1304
+ energies : ndarray
1305
+ An array of energies corresponding to each population member. If
1306
+ maxfun will be exceeded during this call, then the number of
1307
+ function evaluations will be reduced and energies will be
1308
+ right-padded with np.inf. Has shape ``(np.size(population, 0),)``
1309
+ """
1310
+ num_members = np.size(population, 0)
1311
+ # S is the number of function evals left to stay under the
1312
+ # maxfun budget
1313
+ S = min(num_members, self.maxfun - self._nfev)
1314
+
1315
+ energies = np.full(num_members, np.inf)
1316
+
1317
+ parameters_pop = self._scale_parameters(population)
1318
+ try:
1319
+ calc_energies = list(
1320
+ self._mapwrapper(self.func, parameters_pop[0:S])
1321
+ )
1322
+ calc_energies = np.squeeze(calc_energies)
1323
+ except (TypeError, ValueError) as e:
1324
+ # wrong number of arguments for _mapwrapper
1325
+ # or wrong length returned from the mapper
1326
+ raise RuntimeError(
1327
+ "The map-like callable must be of the form f(func, iterable), "
1328
+ "returning a sequence of numbers the same length as 'iterable'"
1329
+ ) from e
1330
+
1331
+ if calc_energies.size != S:
1332
+ if self.vectorized:
1333
+ raise RuntimeError("The vectorized function must return an"
1334
+ " array of shape (S,) when given an array"
1335
+ " of shape (len(x), S)")
1336
+ raise RuntimeError("func(x, *args) must return a scalar value")
1337
+
1338
+ energies[0:S] = calc_energies
1339
+
1340
+ if self.vectorized:
1341
+ self._nfev += 1
1342
+ else:
1343
+ self._nfev += S
1344
+
1345
+ return energies
1346
+
1347
+ def _promote_lowest_energy(self):
1348
+ # swaps 'best solution' into first population entry
1349
+
1350
+ idx = np.arange(self.num_population_members)
1351
+ feasible_solutions = idx[self.feasible]
1352
+ if feasible_solutions.size:
1353
+ # find the best feasible solution
1354
+ idx_t = np.argmin(self.population_energies[feasible_solutions])
1355
+ l = feasible_solutions[idx_t]
1356
+ else:
1357
+ # no solution was feasible, use 'best' infeasible solution, which
1358
+ # will violate constraints the least
1359
+ l = np.argmin(np.sum(self.constraint_violation, axis=1))
1360
+
1361
+ self.population_energies[[0, l]] = self.population_energies[[l, 0]]
1362
+ self.population[[0, l], :] = self.population[[l, 0], :]
1363
+ self.feasible[[0, l]] = self.feasible[[l, 0]]
1364
+ self.constraint_violation[[0, l], :] = (
1365
+ self.constraint_violation[[l, 0], :])
1366
+
1367
+ def _constraint_violation_fn(self, x):
1368
+ """
1369
+ Calculates total constraint violation for all the constraints, for a
1370
+ set of solutions.
1371
+
1372
+ Parameters
1373
+ ----------
1374
+ x : ndarray
1375
+ Solution vector(s). Has shape (S, N), or (N,), where S is the
1376
+ number of solutions to investigate and N is the number of
1377
+ parameters.
1378
+
1379
+ Returns
1380
+ -------
1381
+ cv : ndarray
1382
+ Total violation of constraints. Has shape ``(S, M)``, where M is
1383
+ the total number of constraint components (which is not necessarily
1384
+ equal to len(self._wrapped_constraints)).
1385
+ """
1386
+ # how many solution vectors you're calculating constraint violations
1387
+ # for
1388
+ S = np.size(x) // self.parameter_count
1389
+ _out = np.zeros((S, self.total_constraints))
1390
+ offset = 0
1391
+ for con in self._wrapped_constraints:
1392
+ # the input/output of the (vectorized) constraint function is
1393
+ # {(N, S), (N,)} --> (M, S)
1394
+ # The input to _constraint_violation_fn is (S, N) or (N,), so
1395
+ # transpose to pass it to the constraint. The output is transposed
1396
+ # from (M, S) to (S, M) for further use.
1397
+ c = con.violation(x.T).T
1398
+
1399
+ # The shape of c should be (M,), (1, M), or (S, M). Check for
1400
+ # those shapes, as an incorrect shape indicates that the
1401
+ # user constraint function didn't return the right thing, and
1402
+ # the reshape operation will fail. Intercept the wrong shape
1403
+ # to give a reasonable error message. I'm not sure what failure
1404
+ # modes an inventive user will come up with.
1405
+ if c.shape[-1] != con.num_constr or (S > 1 and c.shape[0] != S):
1406
+ raise RuntimeError("An array returned from a Constraint has"
1407
+ " the wrong shape. If `vectorized is False`"
1408
+ " the Constraint should return an array of"
1409
+ " shape (M,). If `vectorized is True` then"
1410
+ " the Constraint must return an array of"
1411
+ " shape (M, S), where S is the number of"
1412
+ " solution vectors and M is the number of"
1413
+ " constraint components in a given"
1414
+ " Constraint object.")
1415
+
1416
+ # the violation function may return a 1D array, but is it a
1417
+ # sequence of constraints for one solution (S=1, M>=1), or the
1418
+ # value of a single constraint for a sequence of solutions
1419
+ # (S>=1, M=1)
1420
+ c = np.reshape(c, (S, con.num_constr))
1421
+ _out[:, offset:offset + con.num_constr] = c
1422
+ offset += con.num_constr
1423
+
1424
+ return _out
1425
+
1426
+ def _calculate_population_feasibilities(self, population):
1427
+ """
1428
+ Calculate the feasibilities of a population.
1429
+
1430
+ Parameters
1431
+ ----------
1432
+ population : ndarray
1433
+ An array of parameter vectors normalised to [0, 1] using lower
1434
+ and upper limits. Has shape ``(np.size(population, 0), N)``.
1435
+
1436
+ Returns
1437
+ -------
1438
+ feasible, constraint_violation : ndarray, ndarray
1439
+ Boolean array of feasibility for each population member, and an
1440
+ array of the constraint violation for each population member.
1441
+ constraint_violation has shape ``(np.size(population, 0), M)``,
1442
+ where M is the number of constraints.
1443
+ """
1444
+ num_members = np.size(population, 0)
1445
+ if not self._wrapped_constraints:
1446
+ # shortcut for no constraints
1447
+ return np.ones(num_members, bool), np.zeros((num_members, 1))
1448
+
1449
+ # (S, N)
1450
+ parameters_pop = self._scale_parameters(population)
1451
+
1452
+ if self.vectorized:
1453
+ # (S, M)
1454
+ constraint_violation = np.array(
1455
+ self._constraint_violation_fn(parameters_pop)
1456
+ )
1457
+ else:
1458
+ # (S, 1, M)
1459
+ constraint_violation = np.array([self._constraint_violation_fn(x)
1460
+ for x in parameters_pop])
1461
+ # if you use the list comprehension in the line above it will
1462
+ # create an array of shape (S, 1, M), because each iteration
1463
+ # generates an array of (1, M). In comparison the vectorized
1464
+ # version returns (S, M). It's therefore necessary to remove axis 1
1465
+ constraint_violation = constraint_violation[:, 0]
1466
+
1467
+ feasible = ~(np.sum(constraint_violation, axis=1) > 0)
1468
+
1469
+ return feasible, constraint_violation
1470
+
1471
+ def __iter__(self):
1472
+ return self
1473
+
1474
+ def __enter__(self):
1475
+ return self
1476
+
1477
+ def __exit__(self, *args):
1478
+ return self._mapwrapper.__exit__(*args)
1479
+
1480
+ def _accept_trial(self, energy_trial, feasible_trial, cv_trial,
1481
+ energy_orig, feasible_orig, cv_orig):
1482
+ """
1483
+ Trial is accepted if:
1484
+ * it satisfies all constraints and provides a lower or equal objective
1485
+ function value, while both the compared solutions are feasible
1486
+ - or -
1487
+ * it is feasible while the original solution is infeasible,
1488
+ - or -
1489
+ * it is infeasible, but provides a lower or equal constraint violation
1490
+ for all constraint functions.
1491
+
1492
+ This test corresponds to section III of Lampinen [1]_.
1493
+
1494
+ Parameters
1495
+ ----------
1496
+ energy_trial : float
1497
+ Energy of the trial solution
1498
+ feasible_trial : float
1499
+ Feasibility of trial solution
1500
+ cv_trial : array-like
1501
+ Excess constraint violation for the trial solution
1502
+ energy_orig : float
1503
+ Energy of the original solution
1504
+ feasible_orig : float
1505
+ Feasibility of original solution
1506
+ cv_orig : array-like
1507
+ Excess constraint violation for the original solution
1508
+
1509
+ Returns
1510
+ -------
1511
+ accepted : bool
1512
+
1513
+ """
1514
+ if feasible_orig and feasible_trial:
1515
+ return energy_trial <= energy_orig
1516
+ elif feasible_trial and not feasible_orig:
1517
+ return True
1518
+ elif not feasible_trial and (cv_trial <= cv_orig).all():
1519
+ # cv_trial < cv_orig would imply that both trial and orig are not
1520
+ # feasible
1521
+ return True
1522
+
1523
+ return False
1524
+
1525
+ def __next__(self):
1526
+ """
1527
+ Evolve the population by a single generation
1528
+
1529
+ Returns
1530
+ -------
1531
+ x : ndarray
1532
+ The best solution from the solver.
1533
+ fun : float
1534
+ Value of objective function obtained from the best solution.
1535
+ """
1536
+ # the population may have just been initialized (all entries are
1537
+ # np.inf). If it has you have to calculate the initial energies
1538
+ if np.all(np.isinf(self.population_energies)):
1539
+ self.feasible, self.constraint_violation = (
1540
+ self._calculate_population_feasibilities(self.population))
1541
+
1542
+ # only need to work out population energies for those that are
1543
+ # feasible
1544
+ self.population_energies[self.feasible] = (
1545
+ self._calculate_population_energies(
1546
+ self.population[self.feasible]))
1547
+
1548
+ self._promote_lowest_energy()
1549
+
1550
+ if self.dither is not None:
1551
+ self.scale = self.random_number_generator.uniform(self.dither[0],
1552
+ self.dither[1])
1553
+
1554
+ if self._updating == 'immediate':
1555
+ # update best solution immediately
1556
+ for candidate in range(self.num_population_members):
1557
+ if self._nfev > self.maxfun:
1558
+ raise StopIteration
1559
+
1560
+ # create a trial solution
1561
+ trial = self._mutate(candidate)
1562
+
1563
+ # ensuring that it's in the range [0, 1)
1564
+ self._ensure_constraint(trial)
1565
+
1566
+ # scale from [0, 1) to the actual parameter value
1567
+ parameters = self._scale_parameters(trial)
1568
+
1569
+ # determine the energy of the objective function
1570
+ if self._wrapped_constraints:
1571
+ cv = self._constraint_violation_fn(parameters)
1572
+ feasible = False
1573
+ energy = np.inf
1574
+ if not np.sum(cv) > 0:
1575
+ # solution is feasible
1576
+ feasible = True
1577
+ energy = self.func(parameters)
1578
+ self._nfev += 1
1579
+ else:
1580
+ feasible = True
1581
+ cv = np.atleast_2d([0.])
1582
+ energy = self.func(parameters)
1583
+ self._nfev += 1
1584
+
1585
+ # compare trial and population member
1586
+ if self._accept_trial(energy, feasible, cv,
1587
+ self.population_energies[candidate],
1588
+ self.feasible[candidate],
1589
+ self.constraint_violation[candidate]):
1590
+ self.population[candidate] = trial
1591
+ self.population_energies[candidate] = np.squeeze(energy)
1592
+ self.feasible[candidate] = feasible
1593
+ self.constraint_violation[candidate] = cv
1594
+
1595
+ # if the trial candidate is also better than the best
1596
+ # solution then promote it.
1597
+ if self._accept_trial(energy, feasible, cv,
1598
+ self.population_energies[0],
1599
+ self.feasible[0],
1600
+ self.constraint_violation[0]):
1601
+ self._promote_lowest_energy()
1602
+
1603
+ elif self._updating == 'deferred':
1604
+ # update best solution once per generation
1605
+ if self._nfev >= self.maxfun:
1606
+ raise StopIteration
1607
+
1608
+ # 'deferred' approach, vectorised form.
1609
+ # create trial solutions
1610
+ trial_pop = self._mutate_many(
1611
+ np.arange(self.num_population_members)
1612
+ )
1613
+
1614
+ # enforce bounds
1615
+ self._ensure_constraint(trial_pop)
1616
+
1617
+ # determine the energies of the objective function, but only for
1618
+ # feasible trials
1619
+ feasible, cv = self._calculate_population_feasibilities(trial_pop)
1620
+ trial_energies = np.full(self.num_population_members, np.inf)
1621
+
1622
+ # only calculate for feasible entries
1623
+ trial_energies[feasible] = self._calculate_population_energies(
1624
+ trial_pop[feasible])
1625
+
1626
+ # which solutions are 'improved'?
1627
+ loc = [self._accept_trial(*val) for val in
1628
+ zip(trial_energies, feasible, cv, self.population_energies,
1629
+ self.feasible, self.constraint_violation)]
1630
+ loc = np.array(loc)
1631
+ self.population = np.where(loc[:, np.newaxis],
1632
+ trial_pop,
1633
+ self.population)
1634
+ self.population_energies = np.where(loc,
1635
+ trial_energies,
1636
+ self.population_energies)
1637
+ self.feasible = np.where(loc,
1638
+ feasible,
1639
+ self.feasible)
1640
+ self.constraint_violation = np.where(loc[:, np.newaxis],
1641
+ cv,
1642
+ self.constraint_violation)
1643
+
1644
+ # make sure the best solution is updated if updating='deferred'.
1645
+ # put the lowest energy into the best solution position.
1646
+ self._promote_lowest_energy()
1647
+
1648
+ return self.x, self.population_energies[0]
1649
+
1650
+ def _scale_parameters(self, trial):
1651
+ """Scale from a number between 0 and 1 to parameters."""
1652
+ # trial either has shape (N, ) or (L, N), where L is the number of
1653
+ # solutions being scaled
1654
+ scaled = self.__scale_arg1 + (trial - 0.5) * self.__scale_arg2
1655
+ if np.count_nonzero(self.integrality):
1656
+ i = np.broadcast_to(self.integrality, scaled.shape)
1657
+ scaled[i] = np.round(scaled[i])
1658
+ return scaled
1659
+
1660
+ def _unscale_parameters(self, parameters):
1661
+ """Scale from parameters to a number between 0 and 1."""
1662
+ return (parameters - self.__scale_arg1) * self.__recip_scale_arg2 + 0.5
1663
+
1664
+ def _ensure_constraint(self, trial):
1665
+ """Make sure the parameters lie between the limits."""
1666
+ mask = np.bitwise_or(trial > 1, trial < 0)
1667
+ if oob := np.count_nonzero(mask):
1668
+ trial[mask] = self.random_number_generator.uniform(size=oob)
1669
+
1670
+ def _mutate_custom(self, candidate):
1671
+ rng = self.random_number_generator
1672
+ msg = (
1673
+ "strategy must have signature"
1674
+ " f(candidate: int, population: np.ndarray, rng=None) returning an"
1675
+ " array of shape (N,)"
1676
+ )
1677
+ _population = self._scale_parameters(self.population)
1678
+ if not len(np.shape(candidate)):
1679
+ # single entry in population
1680
+ trial = self.strategy(candidate, _population, rng=rng)
1681
+ if trial.shape != (self.parameter_count,):
1682
+ raise RuntimeError(msg)
1683
+ else:
1684
+ S = candidate.shape[0]
1685
+ trial = np.array(
1686
+ [self.strategy(c, _population, rng=rng) for c in candidate],
1687
+ dtype=float
1688
+ )
1689
+ if trial.shape != (S, self.parameter_count):
1690
+ raise RuntimeError(msg)
1691
+ return self._unscale_parameters(trial)
1692
+
1693
+ def _mutate_many(self, candidates):
1694
+ """Create trial vectors based on a mutation strategy."""
1695
+ rng = self.random_number_generator
1696
+
1697
+ S = len(candidates)
1698
+ if callable(self.strategy):
1699
+ return self._mutate_custom(candidates)
1700
+
1701
+ trial = np.copy(self.population[candidates])
1702
+ samples = np.array([self._select_samples(c, 5) for c in candidates])
1703
+
1704
+ if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
1705
+ bprime = self.mutation_func(candidates, samples)
1706
+ else:
1707
+ bprime = self.mutation_func(samples)
1708
+
1709
+ fill_point = rng_integers(rng, self.parameter_count, size=S)
1710
+ crossovers = rng.uniform(size=(S, self.parameter_count))
1711
+ crossovers = crossovers < self.cross_over_probability
1712
+ if self.strategy in self._binomial:
1713
+ # the last one is always from the bprime vector for binomial
1714
+ # If you fill in modulo with a loop you have to set the last one to
1715
+ # true. If you don't use a loop then you can have any random entry
1716
+ # be True.
1717
+ i = np.arange(S)
1718
+ crossovers[i, fill_point[i]] = True
1719
+ trial = np.where(crossovers, bprime, trial)
1720
+ return trial
1721
+
1722
+ elif self.strategy in self._exponential:
1723
+ crossovers[..., 0] = True
1724
+ for j in range(S):
1725
+ i = 0
1726
+ init_fill = fill_point[j]
1727
+ while (i < self.parameter_count and crossovers[j, i]):
1728
+ trial[j, init_fill] = bprime[j, init_fill]
1729
+ init_fill = (init_fill + 1) % self.parameter_count
1730
+ i += 1
1731
+
1732
+ return trial
1733
+
1734
+ def _mutate(self, candidate):
1735
+ """Create a trial vector based on a mutation strategy."""
1736
+ rng = self.random_number_generator
1737
+
1738
+ if callable(self.strategy):
1739
+ return self._mutate_custom(candidate)
1740
+
1741
+ fill_point = rng_integers(rng, self.parameter_count)
1742
+ samples = self._select_samples(candidate, 5)
1743
+
1744
+ trial = np.copy(self.population[candidate])
1745
+
1746
+ if self.strategy in ['currenttobest1exp', 'currenttobest1bin']:
1747
+ bprime = self.mutation_func(candidate, samples)
1748
+ else:
1749
+ bprime = self.mutation_func(samples)
1750
+
1751
+ crossovers = rng.uniform(size=self.parameter_count)
1752
+ crossovers = crossovers < self.cross_over_probability
1753
+ if self.strategy in self._binomial:
1754
+ # the last one is always from the bprime vector for binomial
1755
+ # If you fill in modulo with a loop you have to set the last one to
1756
+ # true. If you don't use a loop then you can have any random entry
1757
+ # be True.
1758
+ crossovers[fill_point] = True
1759
+ trial = np.where(crossovers, bprime, trial)
1760
+ return trial
1761
+
1762
+ elif self.strategy in self._exponential:
1763
+ i = 0
1764
+ crossovers[0] = True
1765
+ while i < self.parameter_count and crossovers[i]:
1766
+ trial[fill_point] = bprime[fill_point]
1767
+ fill_point = (fill_point + 1) % self.parameter_count
1768
+ i += 1
1769
+
1770
+ return trial
1771
+
1772
+ def _best1(self, samples):
1773
+ """best1bin, best1exp"""
1774
+ # samples.shape == (S, 5)
1775
+ # or
1776
+ # samples.shape(5,)
1777
+ r0, r1 = samples[..., :2].T
1778
+ return (self.population[0] + self.scale *
1779
+ (self.population[r0] - self.population[r1]))
1780
+
1781
+ def _rand1(self, samples):
1782
+ """rand1bin, rand1exp"""
1783
+ r0, r1, r2 = samples[..., :3].T
1784
+ return (self.population[r0] + self.scale *
1785
+ (self.population[r1] - self.population[r2]))
1786
+
1787
+ def _randtobest1(self, samples):
1788
+ """randtobest1bin, randtobest1exp"""
1789
+ r0, r1, r2 = samples[..., :3].T
1790
+ bprime = np.copy(self.population[r0])
1791
+ bprime += self.scale * (self.population[0] - bprime)
1792
+ bprime += self.scale * (self.population[r1] -
1793
+ self.population[r2])
1794
+ return bprime
1795
+
1796
+ def _currenttobest1(self, candidate, samples):
1797
+ """currenttobest1bin, currenttobest1exp"""
1798
+ r0, r1 = samples[..., :2].T
1799
+ bprime = (self.population[candidate] + self.scale *
1800
+ (self.population[0] - self.population[candidate] +
1801
+ self.population[r0] - self.population[r1]))
1802
+ return bprime
1803
+
1804
+ def _best2(self, samples):
1805
+ """best2bin, best2exp"""
1806
+ r0, r1, r2, r3 = samples[..., :4].T
1807
+ bprime = (self.population[0] + self.scale *
1808
+ (self.population[r0] + self.population[r1] -
1809
+ self.population[r2] - self.population[r3]))
1810
+
1811
+ return bprime
1812
+
1813
+ def _rand2(self, samples):
1814
+ """rand2bin, rand2exp"""
1815
+ r0, r1, r2, r3, r4 = samples[..., :5].T
1816
+ bprime = (self.population[r0] + self.scale *
1817
+ (self.population[r1] + self.population[r2] -
1818
+ self.population[r3] - self.population[r4]))
1819
+
1820
+ return bprime
1821
+
1822
+ def _select_samples(self, candidate, number_samples):
1823
+ """
1824
+ obtain random integers from range(self.num_population_members),
1825
+ without replacement. You can't have the original candidate either.
1826
+ """
1827
+ self.random_number_generator.shuffle(self._random_population_index)
1828
+ idxs = self._random_population_index[:number_samples + 1]
1829
+ return idxs[idxs != candidate][:number_samples]
1830
+
1831
+
1832
+ class _ConstraintWrapper:
1833
+ """Object to wrap/evaluate user defined constraints.
1834
+
1835
+ Very similar in practice to `PreparedConstraint`, except that no evaluation
1836
+ of jac/hess is performed (explicit or implicit).
1837
+
1838
+ If created successfully, it will contain the attributes listed below.
1839
+
1840
+ Parameters
1841
+ ----------
1842
+ constraint : {`NonlinearConstraint`, `LinearConstraint`, `Bounds`}
1843
+ Constraint to check and prepare.
1844
+ x0 : array_like
1845
+ Initial vector of independent variables, shape (N,)
1846
+
1847
+ Attributes
1848
+ ----------
1849
+ fun : callable
1850
+ Function defining the constraint wrapped by one of the convenience
1851
+ classes.
1852
+ bounds : 2-tuple
1853
+ Contains lower and upper bounds for the constraints --- lb and ub.
1854
+ These are converted to ndarray and have a size equal to the number of
1855
+ the constraints.
1856
+
1857
+ Notes
1858
+ -----
1859
+ _ConstraintWrapper.fun and _ConstraintWrapper.violation can get sent
1860
+ arrays of shape (N, S) or (N,), where S is the number of vectors of shape
1861
+ (N,) to consider constraints for.
1862
+ """
1863
+ def __init__(self, constraint, x0):
1864
+ self.constraint = constraint
1865
+
1866
+ if isinstance(constraint, NonlinearConstraint):
1867
+ def fun(x):
1868
+ x = np.asarray(x)
1869
+ return np.atleast_1d(constraint.fun(x))
1870
+ elif isinstance(constraint, LinearConstraint):
1871
+ def fun(x):
1872
+ if issparse(constraint.A):
1873
+ A = constraint.A
1874
+ else:
1875
+ A = np.atleast_2d(constraint.A)
1876
+
1877
+ res = A.dot(x)
1878
+ # x either has shape (N, S) or (N)
1879
+ # (M, N) x (N, S) --> (M, S)
1880
+ # (M, N) x (N,) --> (M,)
1881
+ # However, if (M, N) is a matrix then:
1882
+ # (M, N) * (N,) --> (M, 1), we need this to be (M,)
1883
+ if x.ndim == 1 and res.ndim == 2:
1884
+ # deal with case that constraint.A is an np.matrix
1885
+ # see gh20041
1886
+ res = np.asarray(res)[:, 0]
1887
+
1888
+ return res
1889
+ elif isinstance(constraint, Bounds):
1890
+ def fun(x):
1891
+ return np.asarray(x)
1892
+ else:
1893
+ raise ValueError("`constraint` of an unknown type is passed.")
1894
+
1895
+ self.fun = fun
1896
+
1897
+ lb = np.asarray(constraint.lb, dtype=float)
1898
+ ub = np.asarray(constraint.ub, dtype=float)
1899
+
1900
+ x0 = np.asarray(x0)
1901
+
1902
+ # find out the number of constraints
1903
+ f0 = fun(x0)
1904
+ self.num_constr = m = f0.size
1905
+ self.parameter_count = x0.size
1906
+
1907
+ if lb.ndim == 0:
1908
+ lb = np.resize(lb, m)
1909
+ if ub.ndim == 0:
1910
+ ub = np.resize(ub, m)
1911
+
1912
+ self.bounds = (lb, ub)
1913
+
1914
+ def __call__(self, x):
1915
+ return np.atleast_1d(self.fun(x))
1916
+
1917
+ def violation(self, x):
1918
+ """How much the constraint is exceeded by.
1919
+
1920
+ Parameters
1921
+ ----------
1922
+ x : array-like
1923
+ Vector of independent variables, (N, S), where N is number of
1924
+ parameters and S is the number of solutions to be investigated.
1925
+
1926
+ Returns
1927
+ -------
1928
+ excess : array-like
1929
+ How much the constraint is exceeded by, for each of the
1930
+ constraints specified by `_ConstraintWrapper.fun`.
1931
+ Has shape (M, S) where M is the number of constraint components.
1932
+ """
1933
+ # expect ev to have shape (num_constr, S) or (num_constr,)
1934
+ ev = self.fun(np.asarray(x))
1935
+
1936
+ try:
1937
+ excess_lb = np.maximum(self.bounds[0] - ev.T, 0)
1938
+ excess_ub = np.maximum(ev.T - self.bounds[1], 0)
1939
+ except ValueError as e:
1940
+ raise RuntimeError("An array returned from a Constraint has"
1941
+ " the wrong shape. If `vectorized is False`"
1942
+ " the Constraint should return an array of"
1943
+ " shape (M,). If `vectorized is True` then"
1944
+ " the Constraint must return an array of"
1945
+ " shape (M, S), where S is the number of"
1946
+ " solution vectors and M is the number of"
1947
+ " constraint components in a given"
1948
+ " Constraint object.") from e
1949
+
1950
+ v = (excess_lb + excess_ub).T
1951
+ return v
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_differentiate.py ADDED
@@ -0,0 +1,856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: disable-error-code="attr-defined"
2
+ import numpy as np
3
+ import scipy._lib._elementwise_iterative_method as eim
4
+ from scipy._lib._util import _RichResult
5
+
6
+ _EERRORINCREASE = -1 # used in _differentiate
7
+
8
+ def _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
9
+ step_factor, step_direction, preserve_shape, callback):
10
+ # Input validation for `_differentiate`
11
+
12
+ if not callable(func):
13
+ raise ValueError('`func` must be callable.')
14
+
15
+ # x has more complex IV that is taken care of during initialization
16
+ x = np.asarray(x)
17
+ dtype = x.dtype if np.issubdtype(x.dtype, np.inexact) else np.float64
18
+
19
+ if not np.iterable(args):
20
+ args = (args,)
21
+
22
+ if atol is None:
23
+ atol = np.finfo(dtype).tiny
24
+
25
+ if rtol is None:
26
+ rtol = np.sqrt(np.finfo(dtype).eps)
27
+
28
+ message = 'Tolerances and step parameters must be non-negative scalars.'
29
+ tols = np.asarray([atol, rtol, initial_step, step_factor])
30
+ if (not np.issubdtype(tols.dtype, np.number)
31
+ or np.any(tols < 0)
32
+ or tols.shape != (4,)):
33
+ raise ValueError(message)
34
+ initial_step, step_factor = tols[2:].astype(dtype)
35
+
36
+ maxiter_int = int(maxiter)
37
+ if maxiter != maxiter_int or maxiter <= 0:
38
+ raise ValueError('`maxiter` must be a positive integer.')
39
+
40
+ order_int = int(order)
41
+ if order_int != order or order <= 0:
42
+ raise ValueError('`order` must be a positive integer.')
43
+
44
+ step_direction = np.sign(step_direction).astype(dtype)
45
+ x, step_direction = np.broadcast_arrays(x, step_direction)
46
+ x, step_direction = x[()], step_direction[()]
47
+
48
+ message = '`preserve_shape` must be True or False.'
49
+ if preserve_shape not in {True, False}:
50
+ raise ValueError(message)
51
+
52
+ if callback is not None and not callable(callback):
53
+ raise ValueError('`callback` must be callable.')
54
+
55
+ return (func, x, args, atol, rtol, maxiter_int, order_int, initial_step,
56
+ step_factor, step_direction, preserve_shape, callback)
57
+
58
+
59
+ def _differentiate(func, x, *, args=(), atol=None, rtol=None, maxiter=10,
60
+ order=8, initial_step=0.5, step_factor=2.0,
61
+ step_direction=0, preserve_shape=False, callback=None):
62
+ """Evaluate the derivative of an elementwise scalar function numerically.
63
+
64
+ Parameters
65
+ ----------
66
+ func : callable
67
+ The function whose derivative is desired. The signature must be::
68
+
69
+ func(x: ndarray, *fargs) -> ndarray
70
+
71
+ where each element of ``x`` is a finite real number and ``fargs`` is a tuple,
72
+ which may contain an arbitrary number of arrays that are broadcastable
73
+ with `x`. ``func`` must be an elementwise function: each element
74
+ ``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
75
+ x : array_like
76
+ Abscissae at which to evaluate the derivative.
77
+ args : tuple, optional
78
+ Additional positional arguments to be passed to `func`. Must be arrays
79
+ broadcastable with `x`. If the callable to be differentiated requires
80
+ arguments that are not broadcastable with `x`, wrap that callable with
81
+ `func`. See Examples.
82
+ atol, rtol : float, optional
83
+ Absolute and relative tolerances for the stopping condition: iteration
84
+ will stop when ``res.error < atol + rtol * abs(res.df)``. The default
85
+ `atol` is the smallest normal number of the appropriate dtype, and
86
+ the default `rtol` is the square root of the precision of the
87
+ appropriate dtype.
88
+ order : int, default: 8
89
+ The (positive integer) order of the finite difference formula to be
90
+ used. Odd integers will be rounded up to the next even integer.
91
+ initial_step : float, default: 0.5
92
+ The (absolute) initial step size for the finite difference derivative
93
+ approximation.
94
+ step_factor : float, default: 2.0
95
+ The factor by which the step size is *reduced* in each iteration; i.e.
96
+ the step size in iteration 1 is ``initial_step/step_factor``. If
97
+ ``step_factor < 1``, subsequent steps will be greater than the initial
98
+ step; this may be useful if steps smaller than some threshold are
99
+ undesirable (e.g. due to subtractive cancellation error).
100
+ maxiter : int, default: 10
101
+ The maximum number of iterations of the algorithm to perform. See
102
+ notes.
103
+ step_direction : array_like
104
+ An array representing the direction of the finite difference steps (for
105
+ use when `x` lies near to the boundary of the domain of the function.)
106
+ Must be broadcastable with `x` and all `args`.
107
+ Where 0 (default), central differences are used; where negative (e.g.
108
+ -1), steps are non-positive; and where positive (e.g. 1), all steps are
109
+ non-negative.
110
+ preserve_shape : bool, default: False
111
+ In the following, "arguments of `func`" refers to the array ``x`` and
112
+ any arrays within ``fargs``. Let ``shape`` be the broadcasted shape
113
+ of `x` and all elements of `args` (which is conceptually
114
+ distinct from ``fargs`` passed into `f`).
115
+
116
+ - When ``preserve_shape=False`` (default), `f` must accept arguments
117
+ of *any* broadcastable shapes.
118
+
119
+ - When ``preserve_shape=True``, `f` must accept arguments of shape
120
+ ``shape`` *or* ``shape + (n,)``, where ``(n,)`` is the number of
121
+ abscissae at which the function is being evaluated.
122
+
123
+ In either case, for each scalar element ``xi`` within `x`, the array
124
+ returned by `f` must include the scalar ``f(xi)`` at the same index.
125
+ Consequently, the shape of the output is always the shape of the input
126
+ ``x``.
127
+
128
+ See Examples.
129
+ callback : callable, optional
130
+ An optional user-supplied function to be called before the first
131
+ iteration and after each iteration.
132
+ Called as ``callback(res)``, where ``res`` is a ``_RichResult``
133
+ similar to that returned by `_differentiate` (but containing the
134
+ current iterate's values of all variables). If `callback` raises a
135
+ ``StopIteration``, the algorithm will terminate immediately and
136
+ `_differentiate` will return a result.
137
+
138
+ Returns
139
+ -------
140
+ res : _RichResult
141
+ An instance of `scipy._lib._util._RichResult` with the following
142
+ attributes. (The descriptions are written as though the values will be
143
+ scalars; however, if `func` returns an array, the outputs will be
144
+ arrays of the same shape.)
145
+
146
+ success : bool
147
+ ``True`` when the algorithm terminated successfully (status ``0``).
148
+ status : int
149
+ An integer representing the exit status of the algorithm.
150
+ ``0`` : The algorithm converged to the specified tolerances.
151
+ ``-1`` : The error estimate increased, so iteration was terminated.
152
+ ``-2`` : The maximum number of iterations was reached.
153
+ ``-3`` : A non-finite value was encountered.
154
+ ``-4`` : Iteration was terminated by `callback`.
155
+ ``1`` : The algorithm is proceeding normally (in `callback` only).
156
+ df : float
157
+ The derivative of `func` at `x`, if the algorithm terminated
158
+ successfully.
159
+ error : float
160
+ An estimate of the error: the magnitude of the difference between
161
+ the current estimate of the derivative and the estimate in the
162
+ previous iteration.
163
+ nit : int
164
+ The number of iterations performed.
165
+ nfev : int
166
+ The number of points at which `func` was evaluated.
167
+ x : float
168
+ The value at which the derivative of `func` was evaluated
169
+ (after broadcasting with `args` and `step_direction`).
170
+
171
+ Notes
172
+ -----
173
+ The implementation was inspired by jacobi [1]_, numdifftools [2]_, and
174
+ DERIVEST [3]_, but the implementation follows the theory of Taylor series
175
+ more straightforwardly (and arguably naively so).
176
+ In the first iteration, the derivative is estimated using a finite
177
+ difference formula of order `order` with maximum step size `initial_step`.
178
+ Each subsequent iteration, the maximum step size is reduced by
179
+ `step_factor`, and the derivative is estimated again until a termination
180
+ condition is reached. The error estimate is the magnitude of the difference
181
+ between the current derivative approximation and that of the previous
182
+ iteration.
183
+
184
+ The stencils of the finite difference formulae are designed such that
185
+ abscissae are "nested": after `func` is evaluated at ``order + 1``
186
+ points in the first iteration, `func` is evaluated at only two new points
187
+ in each subsequent iteration; ``order - 1`` previously evaluated function
188
+ values required by the finite difference formula are reused, and two
189
+ function values (evaluations at the points furthest from `x`) are unused.
190
+
191
+ Step sizes are absolute. When the step size is small relative to the
192
+ magnitude of `x`, precision is lost; for example, if `x` is ``1e20``, the
193
+ default initial step size of ``0.5`` cannot be resolved. Accordingly,
194
+ consider using larger initial step sizes for large magnitudes of `x`.
195
+
196
+ The default tolerances are challenging to satisfy at points where the
197
+ true derivative is exactly zero. If the derivative may be exactly zero,
198
+ consider specifying an absolute tolerance (e.g. ``atol=1e-16``) to
199
+ improve convergence.
200
+
201
+ References
202
+ ----------
203
+ [1]_ Hans Dembinski (@HDembinski). jacobi.
204
+ https://github.com/HDembinski/jacobi
205
+ [2]_ Per A. Brodtkorb and John D'Errico. numdifftools.
206
+ https://numdifftools.readthedocs.io/en/latest/
207
+ [3]_ John D'Errico. DERIVEST: Adaptive Robust Numerical Differentiation.
208
+ https://www.mathworks.com/matlabcentral/fileexchange/13490-adaptive-robust-numerical-differentiation
209
+ [4]_ Numerical Differentition. Wikipedia.
210
+ https://en.wikipedia.org/wiki/Numerical_differentiation
211
+
212
+ Examples
213
+ --------
214
+ Evaluate the derivative of ``np.exp`` at several points ``x``.
215
+
216
+ >>> import numpy as np
217
+ >>> from scipy.optimize._differentiate import _differentiate
218
+ >>> f = np.exp
219
+ >>> df = np.exp # true derivative
220
+ >>> x = np.linspace(1, 2, 5)
221
+ >>> res = _differentiate(f, x)
222
+ >>> res.df # approximation of the derivative
223
+ array([2.71828183, 3.49034296, 4.48168907, 5.75460268, 7.3890561 ])
224
+ >>> res.error # estimate of the error
225
+ array(
226
+ [7.12940817e-12, 9.16688947e-12, 1.17594823e-11, 1.50972568e-11, 1.93942640e-11]
227
+ )
228
+ >>> abs(res.df - df(x)) # true error
229
+ array(
230
+ [3.06421555e-14, 3.01980663e-14, 5.06261699e-14, 6.30606678e-14, 8.34887715e-14]
231
+ )
232
+
233
+ Show the convergence of the approximation as the step size is reduced.
234
+ Each iteration, the step size is reduced by `step_factor`, so for
235
+ sufficiently small initial step, each iteration reduces the error by a
236
+ factor of ``1/step_factor**order`` until finite precision arithmetic
237
+ inhibits further improvement.
238
+
239
+ >>> iter = list(range(1, 12)) # maximum iterations
240
+ >>> hfac = 2 # step size reduction per iteration
241
+ >>> hdir = [-1, 0, 1] # compare left-, central-, and right- steps
242
+ >>> order = 4 # order of differentiation formula
243
+ >>> x = 1
244
+ >>> ref = df(x)
245
+ >>> errors = [] # true error
246
+ >>> for i in iter:
247
+ ... res = _differentiate(f, x, maxiter=i, step_factor=hfac,
248
+ ... step_direction=hdir, order=order,
249
+ ... atol=0, rtol=0) # prevent early termination
250
+ ... errors.append(abs(res.df - ref))
251
+ >>> errors = np.array(errors)
252
+ >>> plt.semilogy(iter, errors[:, 0], label='left differences')
253
+ >>> plt.semilogy(iter, errors[:, 1], label='central differences')
254
+ >>> plt.semilogy(iter, errors[:, 2], label='right differences')
255
+ >>> plt.xlabel('iteration')
256
+ >>> plt.ylabel('error')
257
+ >>> plt.legend()
258
+ >>> plt.show()
259
+ >>> (errors[1, 1] / errors[0, 1], 1 / hfac**order)
260
+ (0.06215223140159822, 0.0625)
261
+
262
+ The implementation is vectorized over `x`, `step_direction`, and `args`.
263
+ The function is evaluated once before the first iteration to perform input
264
+ validation and standardization, and once per iteration thereafter.
265
+
266
+ >>> def f(x, p):
267
+ ... print('here')
268
+ ... f.nit += 1
269
+ ... return x**p
270
+ >>> f.nit = 0
271
+ >>> def df(x, p):
272
+ ... return p*x**(p-1)
273
+ >>> x = np.arange(1, 5)
274
+ >>> p = np.arange(1, 6).reshape((-1, 1))
275
+ >>> hdir = np.arange(-1, 2).reshape((-1, 1, 1))
276
+ >>> res = _differentiate(f, x, args=(p,), step_direction=hdir, maxiter=1)
277
+ >>> np.allclose(res.df, df(x, p))
278
+ True
279
+ >>> res.df.shape
280
+ (3, 5, 4)
281
+ >>> f.nit
282
+ 2
283
+
284
+ By default, `preserve_shape` is False, and therefore the callable
285
+ `f` may be called with arrays of any broadcastable shapes.
286
+ For example:
287
+
288
+ >>> shapes = []
289
+ >>> def f(x, c):
290
+ ... shape = np.broadcast_shapes(x.shape, c.shape)
291
+ ... shapes.append(shape)
292
+ ... return np.sin(c*x)
293
+ >>>
294
+ >>> c = [1, 5, 10, 20]
295
+ >>> res = _differentiate(f, 0, args=(c,))
296
+ >>> shapes
297
+ [(4,), (4, 8), (4, 2), (3, 2), (2, 2), (1, 2)]
298
+
299
+ To understand where these shapes are coming from - and to better
300
+ understand how `_differentiate` computes accurate results - note that
301
+ higher values of ``c`` correspond with higher frequency sinusoids.
302
+ The higher frequency sinusoids make the function's derivative change
303
+ faster, so more function evaluations are required to achieve the target
304
+ accuracy:
305
+
306
+ >>> res.nfev
307
+ array([11, 13, 15, 17])
308
+
309
+ The initial ``shape``, ``(4,)``, corresponds with evaluating the
310
+ function at a single abscissa and all four frequencies; this is used
311
+ for input validation and to determine the size and dtype of the arrays
312
+ that store results. The next shape corresponds with evaluating the
313
+ function at an initial grid of abscissae and all four frequencies.
314
+ Successive calls to the function evaluate the function at two more
315
+ abscissae, increasing the effective order of the approximation by two.
316
+ However, in later function evaluations, the function is evaluated at
317
+ fewer frequencies because the corresponding derivative has already
318
+ converged to the required tolerance. This saves function evaluations to
319
+ improve performance, but it requires the function to accept arguments of
320
+ any shape.
321
+
322
+ "Vector-valued" functions are unlikely to satisfy this requirement.
323
+ For example, consider
324
+
325
+ >>> def f(x):
326
+ ... return [x, np.sin(3*x), x+np.sin(10*x), np.sin(20*x)*(x-1)**2]
327
+
328
+ This integrand is not compatible with `_differentiate` as written; for instance,
329
+ the shape of the output will not be the same as the shape of ``x``. Such a
330
+ function *could* be converted to a compatible form with the introduction of
331
+ additional parameters, but this would be inconvenient. In such cases,
332
+ a simpler solution would be to use `preserve_shape`.
333
+
334
+ >>> shapes = []
335
+ >>> def f(x):
336
+ ... shapes.append(x.shape)
337
+ ... x0, x1, x2, x3 = x
338
+ ... return [x0, np.sin(3*x1), x2+np.sin(10*x2), np.sin(20*x3)*(x3-1)**2]
339
+ >>>
340
+ >>> x = np.zeros(4)
341
+ >>> res = _differentiate(f, x, preserve_shape=True)
342
+ >>> shapes
343
+ [(4,), (4, 8), (4, 2), (4, 2), (4, 2), (4, 2)]
344
+
345
+ Here, the shape of ``x`` is ``(4,)``. With ``preserve_shape=True``, the
346
+ function may be called with argument ``x`` of shape ``(4,)`` or ``(4, n)``,
347
+ and this is what we observe.
348
+
349
+ """
350
+ # TODO (followup):
351
+ # - investigate behavior at saddle points
352
+ # - array initial_step / step_factor?
353
+ # - multivariate functions?
354
+
355
+ res = _differentiate_iv(func, x, args, atol, rtol, maxiter, order, initial_step,
356
+ step_factor, step_direction, preserve_shape, callback)
357
+ (func, x, args, atol, rtol, maxiter, order,
358
+ h0, fac, hdir, preserve_shape, callback) = res
359
+
360
+ # Initialization
361
+ # Since f(x) (no step) is not needed for central differences, it may be
362
+ # possible to eliminate this function evaluation. However, it's useful for
363
+ # input validation and standardization, and everything else is designed to
364
+ # reduce function calls, so let's keep it simple.
365
+ temp = eim._initialize(func, (x,), args, preserve_shape=preserve_shape)
366
+ func, xs, fs, args, shape, dtype, xp = temp
367
+ x, f = xs[0], fs[0]
368
+ df = np.full_like(f, np.nan)
369
+ # Ideally we'd broadcast the shape of `hdir` in `_elementwise_algo_init`, but
370
+ # it's simpler to do it here than to generalize `_elementwise_algo_init` further.
371
+ # `hdir` and `x` are already broadcasted in `_differentiate_iv`, so we know
372
+ # that `hdir` can be broadcasted to the final shape.
373
+ hdir = np.broadcast_to(hdir, shape).flatten()
374
+
375
+ status = np.full_like(x, eim._EINPROGRESS, dtype=int) # in progress
376
+ nit, nfev = 0, 1 # one function evaluations performed above
377
+ # Boolean indices of left, central, right, and (all) one-sided steps
378
+ il = hdir < 0
379
+ ic = hdir == 0
380
+ ir = hdir > 0
381
+ io = il | ir
382
+
383
+ # Most of these attributes are reasonably obvious, but:
384
+ # - `fs` holds all the function values of all active `x`. The zeroth
385
+ # axis corresponds with active points `x`, the first axis corresponds
386
+ # with the different steps (in the order described in
387
+ # `_differentiate_weights`).
388
+ # - `terms` (which could probably use a better name) is half the `order`,
389
+ # which is always even.
390
+ work = _RichResult(x=x, df=df, fs=f[:, np.newaxis], error=np.nan, h=h0,
391
+ df_last=np.nan, error_last=np.nan, h0=h0, fac=fac,
392
+ atol=atol, rtol=rtol, nit=nit, nfev=nfev,
393
+ status=status, dtype=dtype, terms=(order+1)//2,
394
+ hdir=hdir, il=il, ic=ic, ir=ir, io=io)
395
+ # This is the correspondence between terms in the `work` object and the
396
+ # final result. In this case, the mapping is trivial. Note that `success`
397
+ # is prepended automatically.
398
+ res_work_pairs = [('status', 'status'), ('df', 'df'), ('error', 'error'),
399
+ ('nit', 'nit'), ('nfev', 'nfev'), ('x', 'x')]
400
+
401
+ def pre_func_eval(work):
402
+ """Determine the abscissae at which the function needs to be evaluated.
403
+
404
+ See `_differentiate_weights` for a description of the stencil (pattern
405
+ of the abscissae).
406
+
407
+ In the first iteration, there is only one stored function value in
408
+ `work.fs`, `f(x)`, so we need to evaluate at `order` new points. In
409
+ subsequent iterations, we evaluate at two new points. Note that
410
+ `work.x` is always flattened into a 1D array after broadcasting with
411
+ all `args`, so we add a new axis at the end and evaluate all point
412
+ in one call to the function.
413
+
414
+ For improvement:
415
+ - Consider measuring the step size actually taken, since `(x + h) - x`
416
+ is not identically equal to `h` with floating point arithmetic.
417
+ - Adjust the step size automatically if `x` is too big to resolve the
418
+ step.
419
+ - We could probably save some work if there are no central difference
420
+ steps or no one-sided steps.
421
+ """
422
+ n = work.terms # half the order
423
+ h = work.h # step size
424
+ c = work.fac # step reduction factor
425
+ d = c**0.5 # square root of step reduction factor (one-sided stencil)
426
+ # Note - no need to be careful about dtypes until we allocate `x_eval`
427
+
428
+ if work.nit == 0:
429
+ hc = h / c**np.arange(n)
430
+ hc = np.concatenate((-hc[::-1], hc))
431
+ else:
432
+ hc = np.asarray([-h, h]) / c**(n-1)
433
+
434
+ if work.nit == 0:
435
+ hr = h / d**np.arange(2*n)
436
+ else:
437
+ hr = np.asarray([h, h/d]) / c**(n-1)
438
+
439
+ n_new = 2*n if work.nit == 0 else 2 # number of new abscissae
440
+ x_eval = np.zeros((len(work.hdir), n_new), dtype=work.dtype)
441
+ il, ic, ir = work.il, work.ic, work.ir
442
+ x_eval[ir] = work.x[ir, np.newaxis] + hr
443
+ x_eval[ic] = work.x[ic, np.newaxis] + hc
444
+ x_eval[il] = work.x[il, np.newaxis] - hr
445
+ return x_eval
446
+
447
+ def post_func_eval(x, f, work):
448
+ """ Estimate the derivative and error from the function evaluations
449
+
450
+ As in `pre_func_eval`: in the first iteration, there is only one stored
451
+ function value in `work.fs`, `f(x)`, so we need to add the `order` new
452
+ points. In subsequent iterations, we add two new points. The tricky
453
+ part is getting the order to match that of the weights, which is
454
+ described in `_differentiate_weights`.
455
+
456
+ For improvement:
457
+ - Change the order of the weights (and steps in `pre_func_eval`) to
458
+ simplify `work_fc` concatenation and eliminate `fc` concatenation.
459
+ - It would be simple to do one-step Richardson extrapolation with `df`
460
+ and `df_last` to increase the order of the estimate and/or improve
461
+ the error estimate.
462
+ - Process the function evaluations in a more numerically favorable
463
+ way. For instance, combining the pairs of central difference evals
464
+ into a second-order approximation and using Richardson extrapolation
465
+ to produce a higher order approximation seemed to retain accuracy up
466
+ to very high order.
467
+ - Alternatively, we could use `polyfit` like Jacobi. An advantage of
468
+ fitting polynomial to more points than necessary is improved noise
469
+ tolerance.
470
+ """
471
+ n = work.terms
472
+ n_new = n if work.nit == 0 else 1
473
+ il, ic, io = work.il, work.ic, work.io
474
+
475
+ # Central difference
476
+ # `work_fc` is *all* the points at which the function has been evaluated
477
+ # `fc` is the points we're using *this iteration* to produce the estimate
478
+ work_fc = (f[ic, :n_new], work.fs[ic, :], f[ic, -n_new:])
479
+ work_fc = np.concatenate(work_fc, axis=-1)
480
+ if work.nit == 0:
481
+ fc = work_fc
482
+ else:
483
+ fc = (work_fc[:, :n], work_fc[:, n:n+1], work_fc[:, -n:])
484
+ fc = np.concatenate(fc, axis=-1)
485
+
486
+ # One-sided difference
487
+ work_fo = np.concatenate((work.fs[io, :], f[io, :]), axis=-1)
488
+ if work.nit == 0:
489
+ fo = work_fo
490
+ else:
491
+ fo = np.concatenate((work_fo[:, 0:1], work_fo[:, -2*n:]), axis=-1)
492
+
493
+ work.fs = np.zeros((len(ic), work.fs.shape[-1] + 2*n_new))
494
+ work.fs[ic] = work_fc
495
+ work.fs[io] = work_fo
496
+
497
+ wc, wo = _differentiate_weights(work, n)
498
+ work.df_last = work.df.copy()
499
+ work.df[ic] = fc @ wc / work.h
500
+ work.df[io] = fo @ wo / work.h
501
+ work.df[il] *= -1
502
+
503
+ work.h /= work.fac
504
+ work.error_last = work.error
505
+ # Simple error estimate - the difference in derivative estimates between
506
+ # this iteration and the last. This is typically conservative because if
507
+ # convergence has begin, the true error is much closer to the difference
508
+ # between the current estimate and the *next* error estimate. However,
509
+ # we could use Richarson extrapolation to produce an error estimate that
510
+ # is one order higher, and take the difference between that and
511
+ # `work.df` (which would just be constant factor that depends on `fac`.)
512
+ work.error = abs(work.df - work.df_last)
513
+
514
+ def check_termination(work):
515
+ """Terminate due to convergence, non-finite values, or error increase"""
516
+ stop = np.zeros_like(work.df).astype(bool)
517
+
518
+ i = work.error < work.atol + work.rtol*abs(work.df)
519
+ work.status[i] = eim._ECONVERGED
520
+ stop[i] = True
521
+
522
+ if work.nit > 0:
523
+ i = ~((np.isfinite(work.x) & np.isfinite(work.df)) | stop)
524
+ work.df[i], work.status[i] = np.nan, eim._EVALUEERR
525
+ stop[i] = True
526
+
527
+ # With infinite precision, there is a step size below which
528
+ # all smaller step sizes will reduce the error. But in floating point
529
+ # arithmetic, catastrophic cancellation will begin to cause the error
530
+ # to increase again. This heuristic tries to avoid step sizes that are
531
+ # too small. There may be more theoretically sound approaches for
532
+ # detecting a step size that minimizes the total error, but this
533
+ # heuristic seems simple and effective.
534
+ i = (work.error > work.error_last*10) & ~stop
535
+ work.status[i] = _EERRORINCREASE
536
+ stop[i] = True
537
+
538
+ return stop
539
+
540
+ def post_termination_check(work):
541
+ return
542
+
543
+ def customize_result(res, shape):
544
+ return shape
545
+
546
+ return eim._loop(work, callback, shape, maxiter, func, args, dtype,
547
+ pre_func_eval, post_func_eval, check_termination,
548
+ post_termination_check, customize_result, res_work_pairs,
549
+ xp, preserve_shape)
550
+
551
+
552
+ def _differentiate_weights(work, n):
553
+ # This produces the weights of the finite difference formula for a given
554
+ # stencil. In experiments, use of a second-order central difference formula
555
+ # with Richardson extrapolation was more accurate numerically, but it was
556
+ # more complicated, and it would have become even more complicated when
557
+ # adding support for one-sided differences. However, now that all the
558
+ # function evaluation values are stored, they can be processed in whatever
559
+ # way is desired to produce the derivative estimate. We leave alternative
560
+ # approaches to future work. To be more self-contained, here is the theory
561
+ # for deriving the weights below.
562
+ #
563
+ # Recall that the Taylor expansion of a univariate, scalar-values function
564
+ # about a point `x` may be expressed as:
565
+ # f(x + h) = f(x) + f'(x)*h + f''(x)/2!*h**2 + O(h**3)
566
+ # Suppose we evaluate f(x), f(x+h), and f(x-h). We have:
567
+ # f(x) = f(x)
568
+ # f(x + h) = f(x) + f'(x)*h + f''(x)/2!*h**2 + O(h**3)
569
+ # f(x - h) = f(x) - f'(x)*h + f''(x)/2!*h**2 + O(h**3)
570
+ # We can solve for weights `wi` such that:
571
+ # w1*f(x) = w1*(f(x))
572
+ # + w2*f(x + h) = w2*(f(x) + f'(x)*h + f''(x)/2!*h**2) + O(h**3)
573
+ # + w3*f(x - h) = w3*(f(x) - f'(x)*h + f''(x)/2!*h**2) + O(h**3)
574
+ # = 0 + f'(x)*h + 0 + O(h**3)
575
+ # Then
576
+ # f'(x) ~ (w1*f(x) + w2*f(x+h) + w3*f(x-h))/h
577
+ # is a finite difference derivative approximation with error O(h**2),
578
+ # and so it is said to be a "second-order" approximation. Under certain
579
+ # conditions (e.g. well-behaved function, `h` sufficiently small), the
580
+ # error in the approximation will decrease with h**2; that is, if `h` is
581
+ # reduced by a factor of 2, the error is reduced by a factor of 4.
582
+ #
583
+ # By default, we use eighth-order formulae. Our central-difference formula
584
+ # uses abscissae:
585
+ # x-h/c**3, x-h/c**2, x-h/c, x-h, x, x+h, x+h/c, x+h/c**2, x+h/c**3
586
+ # where `c` is the step factor. (Typically, the step factor is greater than
587
+ # one, so the outermost points - as written above - are actually closest to
588
+ # `x`.) This "stencil" is chosen so that each iteration, the step can be
589
+ # reduced by the factor `c`, and most of the function evaluations can be
590
+ # reused with the new step size. For example, in the next iteration, we
591
+ # will have:
592
+ # x-h/c**4, x-h/c**3, x-h/c**2, x-h/c, x, x+h/c, x+h/c**2, x+h/c**3, x+h/c**4
593
+ # We do not reuse `x-h` and `x+h` for the new derivative estimate.
594
+ # While this would increase the order of the formula and thus the
595
+ # theoretical convergence rate, it is also less stable numerically.
596
+ # (As noted above, there are other ways of processing the values that are
597
+ # more stable. Thus, even now we store `f(x-h)` and `f(x+h)` in `work.fs`
598
+ # to simplify future development of this sort of improvement.)
599
+ #
600
+ # The (right) one-sided formula is produced similarly using abscissae
601
+ # x, x+h, x+h/d, x+h/d**2, ..., x+h/d**6, x+h/d**7, x+h/d**7
602
+ # where `d` is the square root of `c`. (The left one-sided formula simply
603
+ # uses -h.) When the step size is reduced by factor `c = d**2`, we have
604
+ # abscissae:
605
+ # x, x+h/d**2, x+h/d**3..., x+h/d**8, x+h/d**9, x+h/d**9
606
+ # `d` is chosen as the square root of `c` so that the rate of the step-size
607
+ # reduction is the same per iteration as in the central difference case.
608
+ # Note that because the central difference formulas are inherently of even
609
+ # order, for simplicity, we use only even-order formulas for one-sided
610
+ # differences, too.
611
+
612
+ # It's possible for the user to specify `fac` in, say, double precision but
613
+ # `x` and `args` in single precision. `fac` gets converted to single
614
+ # precision, but we should always use double precision for the intermediate
615
+ # calculations here to avoid additional error in the weights.
616
+ fac = work.fac.astype(np.float64)
617
+
618
+ # Note that if the user switches back to floating point precision with
619
+ # `x` and `args`, then `fac` will not necessarily equal the (lower
620
+ # precision) cached `_differentiate_weights.fac`, and the weights will
621
+ # need to be recalculated. This could be fixed, but it's late, and of
622
+ # low consequence.
623
+ if fac != _differentiate_weights.fac:
624
+ _differentiate_weights.central = []
625
+ _differentiate_weights.right = []
626
+ _differentiate_weights.fac = fac
627
+
628
+ if len(_differentiate_weights.central) != 2*n + 1:
629
+ # Central difference weights. Consider refactoring this; it could
630
+ # probably be more compact.
631
+ i = np.arange(-n, n + 1)
632
+ p = np.abs(i) - 1. # center point has power `p` -1, but sign `s` is 0
633
+ s = np.sign(i)
634
+
635
+ h = s / fac ** p
636
+ A = np.vander(h, increasing=True).T
637
+ b = np.zeros(2*n + 1)
638
+ b[1] = 1
639
+ weights = np.linalg.solve(A, b)
640
+
641
+ # Enforce identities to improve accuracy
642
+ weights[n] = 0
643
+ for i in range(n):
644
+ weights[-i-1] = -weights[i]
645
+
646
+ # Cache the weights. We only need to calculate them once unless
647
+ # the step factor changes.
648
+ _differentiate_weights.central = weights
649
+
650
+ # One-sided difference weights. The left one-sided weights (with
651
+ # negative steps) are simply the negative of the right one-sided
652
+ # weights, so no need to compute them separately.
653
+ i = np.arange(2*n + 1)
654
+ p = i - 1.
655
+ s = np.sign(i)
656
+
657
+ h = s / np.sqrt(fac) ** p
658
+ A = np.vander(h, increasing=True).T
659
+ b = np.zeros(2 * n + 1)
660
+ b[1] = 1
661
+ weights = np.linalg.solve(A, b)
662
+
663
+ _differentiate_weights.right = weights
664
+
665
+ return (_differentiate_weights.central.astype(work.dtype, copy=False),
666
+ _differentiate_weights.right.astype(work.dtype, copy=False))
667
+ _differentiate_weights.central = []
668
+ _differentiate_weights.right = []
669
+ _differentiate_weights.fac = None
670
+
671
+
672
+ def _jacobian(func, x, *, atol=None, rtol=None, maxiter=10,
673
+ order=8, initial_step=0.5, step_factor=2.0):
674
+ r"""Evaluate the Jacobian of a function numerically.
675
+
676
+ Parameters
677
+ ----------
678
+ func : callable
679
+ The function whose Jacobian is desired. The signature must be::
680
+
681
+ func(x: ndarray) -> ndarray
682
+
683
+ where each element of ``x`` is a finite real. If the function to be
684
+ differentiated accepts additional, arguments wrap it (e.g. using
685
+ `functools.partial` or ``lambda``) and pass the wrapped callable
686
+ into `_jacobian`. See Notes regarding vectorization and the dimensionality
687
+ of the input and output.
688
+ x : array_like
689
+ Points at which to evaluate the Jacobian. Must have at least one dimension.
690
+ See Notes regarding the dimensionality and vectorization.
691
+ atol, rtol : float, optional
692
+ Absolute and relative tolerances for the stopping condition: iteration
693
+ will stop for each element of the Jacobian when
694
+ ``res.error < atol + rtol * abs(res.df)``. The default `atol` is the
695
+ smallest normal number of the appropriate dtype, and the default `rtol`
696
+ is the square root of the precision of the appropriate dtype.
697
+ order : int, default: 8
698
+ The (positive integer) order of the finite difference formula to be
699
+ used. Odd integers will be rounded up to the next even integer.
700
+ initial_step : float, default: 0.5
701
+ The (absolute) initial step size for the finite difference derivative
702
+ approximation.
703
+ step_factor : float, default: 2.0
704
+ The factor by which the step size is *reduced* in each iteration; i.e.
705
+ the step size in iteration 1 is ``initial_step/step_factor``. If
706
+ ``step_factor < 1``, subsequent steps will be greater than the initial
707
+ step; this may be useful if steps smaller than some threshold are
708
+ undesirable (e.g. due to subtractive cancellation error).
709
+ maxiter : int, default: 10
710
+ The maximum number of iterations of the algorithm to perform.
711
+
712
+ Returns
713
+ -------
714
+ res : _RichResult
715
+ An instance of `scipy._lib._util._RichResult` with the following
716
+ attributes.
717
+
718
+ success : bool array
719
+ ``True`` when the algorithm terminated successfully (status ``0``).
720
+ status : int array
721
+ An integer representing the exit status of the algorithm.
722
+ ``0`` : The algorithm converged to the specified tolerances.
723
+ ``-1`` : The error estimate increased, so iteration was terminated.
724
+ ``-2`` : The maximum number of iterations was reached.
725
+ ``-3`` : A non-finite value was encountered.
726
+ ``-4`` : Iteration was terminated by `callback`.
727
+ ``1`` : The algorithm is proceeding normally (in `callback` only).
728
+ df : float array
729
+ The Jacobian of `func` at `x`, if the algorithm terminated
730
+ successfully.
731
+ error : float array
732
+ An estimate of the error: the magnitude of the difference between
733
+ the current estimate of the derivative and the estimate in the
734
+ previous iteration.
735
+ nit : int array
736
+ The number of iterations performed.
737
+ nfev : int array
738
+ The number of points at which `func` was evaluated.
739
+ x : float array
740
+ The value at which the derivative of `func` was evaluated.
741
+
742
+ See Also
743
+ --------
744
+ _differentiate
745
+
746
+ Notes
747
+ -----
748
+ Suppose we wish to evaluate the Jacobian of a function
749
+ :math:`f: \mathbf{R^m} \rightarrow \mathbf{R^n}`, and assign to variables
750
+ ``m`` and ``n`` the positive integer values of :math:`m` and :math:`n`,
751
+ respectively. If we wish to evaluate the Jacobian at a single point,
752
+ then:
753
+
754
+ - argument `x` must be an array of shape ``(m,)``
755
+ - argument `func` must be vectorized to accept an array of shape ``(m, p)``.
756
+ The first axis represents the :math:`m` inputs of :math:`f`; the second
757
+ is for evaluating the function at multiple points in a single call.
758
+ - argument `func` must return an array of shape ``(n, p)``. The first
759
+ axis represents the :math:`n` outputs of :math:`f`; the second
760
+ is for the result of evaluating the function at multiple points.
761
+ - attribute ``df`` of the result object will be an array of shape ``(n, m)``,
762
+ the Jacobian.
763
+
764
+ This function is also vectorized in the sense that the Jacobian can be
765
+ evaluated at ``k`` points in a single call. In this case, `x` would be an
766
+ array of shape ``(m, k)``, `func` would accept an array of shape
767
+ ``(m, k, p)`` and return an array of shape ``(n, k, p)``, and the ``df``
768
+ attribute of the result would have shape ``(n, m, k)``.
769
+
770
+ References
771
+ ----------
772
+ .. [1] Jacobian matrix and determinant, *Wikipedia*,
773
+ https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant
774
+
775
+ Examples
776
+ --------
777
+ The Rosenbrock function maps from :math:`\mathbf{R}^m \righarrow \mathbf{R}`;
778
+ the SciPy implementation `scipy.optimize.rosen` is vectorized to accept an
779
+ array of shape ``(m, p)`` and return an array of shape ``m``. Suppose we wish
780
+ to evaluate the Jacobian (AKA the gradient because the function returns a scalar)
781
+ at ``[0.5, 0.5, 0.5]``.
782
+
783
+ >>> import numpy as np
784
+ >>> from scipy.optimize._differentiate import _jacobian as jacobian
785
+ >>> from scipy.optimize import rosen, rosen_der
786
+ >>> m = 3
787
+ >>> x = np.full(m, 0.5)
788
+ >>> res = jacobian(rosen, x)
789
+ >>> ref = rosen_der(x) # reference value of the gradient
790
+ >>> res.df, ref
791
+ (array([-51., -1., 50.]), array([-51., -1., 50.]))
792
+
793
+ As an example of a function with multiple outputs, consider Example 4
794
+ from [1]_.
795
+
796
+ >>> def f(x):
797
+ ... x1, x2, x3 = x ...
798
+ ... return [x1, 5*x3, 4*x2**2 - 2*x3, x3*np.sin(x1)]
799
+
800
+ The true Jacobian is given by:
801
+
802
+ >>> def df(x):
803
+ ... x1, x2, x3 = x
804
+ ... one = np.ones_like(x1)
805
+ ... return [[one, 0*one, 0*one],
806
+ ... [0*one, 0*one, 5*one],
807
+ ... [0*one, 8*x2, -2*one],
808
+ ... [x3*np.cos(x1), 0*one, np.sin(x1)]]
809
+
810
+ Evaluate the Jacobian at an arbitrary point.
811
+
812
+ >>> rng = np.random.default_rng(389252938452)
813
+ >>> x = rng.random(size=3)
814
+ >>> res = jacobian(f, x)
815
+ >>> ref = df(x)
816
+ >>> res.df.shape == (4, 3)
817
+ True
818
+ >>> np.allclose(res.df, ref)
819
+ True
820
+
821
+ Evaluate the Jacobian at 10 arbitrary points in a single call.
822
+
823
+ >>> x = rng.random(size=(3, 10))
824
+ >>> res = jacobian(f, x)
825
+ >>> ref = df(x)
826
+ >>> res.df.shape == (4, 3, 10)
827
+ True
828
+ >>> np.allclose(res.df, ref)
829
+ True
830
+
831
+ """
832
+ x = np.asarray(x)
833
+ int_dtype = np.issubdtype(x.dtype, np.integer)
834
+ x0 = np.asarray(x, dtype=float) if int_dtype else x
835
+
836
+ if x0.ndim < 1:
837
+ message = "Argument `x` must be at least 1-D."
838
+ raise ValueError(message)
839
+
840
+ m = x0.shape[0]
841
+ i = np.arange(m)
842
+
843
+ def wrapped(x):
844
+ p = () if x.ndim == x0.ndim else (x.shape[-1],) # number of abscissae
845
+ new_dims = (1,) if x.ndim == x0.ndim else (1, -1)
846
+ new_shape = (m, m) + x0.shape[1:] + p
847
+ xph = np.expand_dims(x0, new_dims)
848
+ xph = np.broadcast_to(xph, new_shape).copy()
849
+ xph[i, i] = x
850
+ return func(xph)
851
+
852
+ res = _differentiate(wrapped, x, atol=atol, rtol=rtol,
853
+ maxiter=maxiter, order=order, initial_step=initial_step,
854
+ step_factor=step_factor, preserve_shape=True)
855
+ del res.x # the user knows `x`, and the way it gets broadcasted is meaningless here
856
+ return res
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct.cpython-310-x86_64-linux-gnu.so ADDED
Binary file (43.5 kB). View file
 
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_direct_py.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import ( # noqa: UP035
3
+ Any, Callable, Iterable, TYPE_CHECKING
4
+ )
5
+
6
+ import numpy as np
7
+ from scipy.optimize import OptimizeResult
8
+ from ._constraints import old_bound_to_new, Bounds
9
+ from ._direct import direct as _direct # type: ignore
10
+
11
+ if TYPE_CHECKING:
12
+ import numpy.typing as npt
13
+
14
+ __all__ = ['direct']
15
+
16
+ ERROR_MESSAGES = (
17
+ "Number of function evaluations done is larger than maxfun={}",
18
+ "Number of iterations is larger than maxiter={}",
19
+ "u[i] < l[i] for some i",
20
+ "maxfun is too large",
21
+ "Initialization failed",
22
+ "There was an error in the creation of the sample points",
23
+ "An error occurred while the function was sampled",
24
+ "Maximum number of levels has been reached.",
25
+ "Forced stop",
26
+ "Invalid arguments",
27
+ "Out of memory",
28
+ )
29
+
30
+ SUCCESS_MESSAGES = (
31
+ ("The best function value found is within a relative error={} "
32
+ "of the (known) global optimum f_min"),
33
+ ("The volume of the hyperrectangle containing the lowest function value "
34
+ "found is below vol_tol={}"),
35
+ ("The side length measure of the hyperrectangle containing the lowest "
36
+ "function value found is below len_tol={}"),
37
+ )
38
+
39
+
40
+ def direct(
41
+ func: Callable[[npt.ArrayLike, tuple[Any]], float],
42
+ bounds: Iterable | Bounds,
43
+ *,
44
+ args: tuple = (),
45
+ eps: float = 1e-4,
46
+ maxfun: int | None = None,
47
+ maxiter: int = 1000,
48
+ locally_biased: bool = True,
49
+ f_min: float = -np.inf,
50
+ f_min_rtol: float = 1e-4,
51
+ vol_tol: float = 1e-16,
52
+ len_tol: float = 1e-6,
53
+ callback: Callable[[npt.ArrayLike], None] | None = None
54
+ ) -> OptimizeResult:
55
+ """
56
+ Finds the global minimum of a function using the
57
+ DIRECT algorithm.
58
+
59
+ Parameters
60
+ ----------
61
+ func : callable
62
+ The objective function to be minimized.
63
+ ``func(x, *args) -> float``
64
+ where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of
65
+ the fixed parameters needed to completely specify the function.
66
+ bounds : sequence or `Bounds`
67
+ Bounds for variables. There are two ways to specify the bounds:
68
+
69
+ 1. Instance of `Bounds` class.
70
+ 2. ``(min, max)`` pairs for each element in ``x``.
71
+
72
+ args : tuple, optional
73
+ Any additional fixed parameters needed to
74
+ completely specify the objective function.
75
+ eps : float, optional
76
+ Minimal required difference of the objective function values
77
+ between the current best hyperrectangle and the next potentially
78
+ optimal hyperrectangle to be divided. In consequence, `eps` serves as a
79
+ tradeoff between local and global search: the smaller, the more local
80
+ the search becomes. Default is 1e-4.
81
+ maxfun : int or None, optional
82
+ Approximate upper bound on objective function evaluations.
83
+ If `None`, will be automatically set to ``1000 * N`` where ``N``
84
+ represents the number of dimensions. Will be capped if necessary to
85
+ limit DIRECT's RAM usage to app. 1GiB. This will only occur for very
86
+ high dimensional problems and excessive `max_fun`. Default is `None`.
87
+ maxiter : int, optional
88
+ Maximum number of iterations. Default is 1000.
89
+ locally_biased : bool, optional
90
+ If `True` (default), use the locally biased variant of the
91
+ algorithm known as DIRECT_L. If `False`, use the original unbiased
92
+ DIRECT algorithm. For hard problems with many local minima,
93
+ `False` is recommended.
94
+ f_min : float, optional
95
+ Function value of the global optimum. Set this value only if the
96
+ global optimum is known. Default is ``-np.inf``, so that this
97
+ termination criterion is deactivated.
98
+ f_min_rtol : float, optional
99
+ Terminate the optimization once the relative error between the
100
+ current best minimum `f` and the supplied global minimum `f_min`
101
+ is smaller than `f_min_rtol`. This parameter is only used if
102
+ `f_min` is also set. Must lie between 0 and 1. Default is 1e-4.
103
+ vol_tol : float, optional
104
+ Terminate the optimization once the volume of the hyperrectangle
105
+ containing the lowest function value is smaller than `vol_tol`
106
+ of the complete search space. Must lie between 0 and 1.
107
+ Default is 1e-16.
108
+ len_tol : float, optional
109
+ If `locally_biased=True`, terminate the optimization once half of
110
+ the normalized maximal side length of the hyperrectangle containing
111
+ the lowest function value is smaller than `len_tol`.
112
+ If `locally_biased=False`, terminate the optimization once half of
113
+ the normalized diagonal of the hyperrectangle containing the lowest
114
+ function value is smaller than `len_tol`. Must lie between 0 and 1.
115
+ Default is 1e-6.
116
+ callback : callable, optional
117
+ A callback function with signature ``callback(xk)`` where ``xk``
118
+ represents the best function value found so far.
119
+
120
+ Returns
121
+ -------
122
+ res : OptimizeResult
123
+ The optimization result represented as a ``OptimizeResult`` object.
124
+ Important attributes are: ``x`` the solution array, ``success`` a
125
+ Boolean flag indicating if the optimizer exited successfully and
126
+ ``message`` which describes the cause of the termination. See
127
+ `OptimizeResult` for a description of other attributes.
128
+
129
+ Notes
130
+ -----
131
+ DIviding RECTangles (DIRECT) is a deterministic global
132
+ optimization algorithm capable of minimizing a black box function with
133
+ its variables subject to lower and upper bound constraints by sampling
134
+ potential solutions in the search space [1]_. The algorithm starts by
135
+ normalising the search space to an n-dimensional unit hypercube.
136
+ It samples the function at the center of this hypercube and at 2n
137
+ (n is the number of variables) more points, 2 in each coordinate
138
+ direction. Using these function values, DIRECT then divides the
139
+ domain into hyperrectangles, each having exactly one of the sampling
140
+ points as its center. In each iteration, DIRECT chooses, using the `eps`
141
+ parameter which defaults to 1e-4, some of the existing hyperrectangles
142
+ to be further divided. This division process continues until either the
143
+ maximum number of iterations or maximum function evaluations allowed
144
+ are exceeded, or the hyperrectangle containing the minimal value found
145
+ so far becomes small enough. If `f_min` is specified, the optimization
146
+ will stop once this function value is reached within a relative tolerance.
147
+ The locally biased variant of DIRECT (originally called DIRECT_L) [2]_ is
148
+ used by default. It makes the search more locally biased and more
149
+ efficient for cases with only a few local minima.
150
+
151
+ A note about termination criteria: `vol_tol` refers to the volume of the
152
+ hyperrectangle containing the lowest function value found so far. This
153
+ volume decreases exponentially with increasing dimensionality of the
154
+ problem. Therefore `vol_tol` should be decreased to avoid premature
155
+ termination of the algorithm for higher dimensions. This does not hold
156
+ for `len_tol`: it refers either to half of the maximal side length
157
+ (for ``locally_biased=True``) or half of the diagonal of the
158
+ hyperrectangle (for ``locally_biased=False``).
159
+
160
+ This code is based on the DIRECT 2.0.4 Fortran code by Gablonsky et al. at
161
+ https://ctk.math.ncsu.edu/SOFTWARE/DIRECTv204.tar.gz .
162
+ This original version was initially converted via f2c and then cleaned up
163
+ and reorganized by Steven G. Johnson, August 2007, for the NLopt project.
164
+ The `direct` function wraps the C implementation.
165
+
166
+ .. versionadded:: 1.9.0
167
+
168
+ References
169
+ ----------
170
+ .. [1] Jones, D.R., Perttunen, C.D. & Stuckman, B.E. Lipschitzian
171
+ optimization without the Lipschitz constant. J Optim Theory Appl
172
+ 79, 157-181 (1993).
173
+ .. [2] Gablonsky, J., Kelley, C. A Locally-Biased form of the DIRECT
174
+ Algorithm. Journal of Global Optimization 21, 27-37 (2001).
175
+
176
+ Examples
177
+ --------
178
+ The following example is a 2-D problem with four local minima: minimizing
179
+ the Styblinski-Tang function
180
+ (https://en.wikipedia.org/wiki/Test_functions_for_optimization).
181
+
182
+ >>> from scipy.optimize import direct, Bounds
183
+ >>> def styblinski_tang(pos):
184
+ ... x, y = pos
185
+ ... return 0.5 * (x**4 - 16*x**2 + 5*x + y**4 - 16*y**2 + 5*y)
186
+ >>> bounds = Bounds([-4., -4.], [4., 4.])
187
+ >>> result = direct(styblinski_tang, bounds)
188
+ >>> result.x, result.fun, result.nfev
189
+ array([-2.90321597, -2.90321597]), -78.3323279095383, 2011
190
+
191
+ The correct global minimum was found but with a huge number of function
192
+ evaluations (2011). Loosening the termination tolerances `vol_tol` and
193
+ `len_tol` can be used to stop DIRECT earlier.
194
+
195
+ >>> result = direct(styblinski_tang, bounds, len_tol=1e-3)
196
+ >>> result.x, result.fun, result.nfev
197
+ array([-2.9044353, -2.9044353]), -78.33230330754142, 207
198
+
199
+ """
200
+ # convert bounds to new Bounds class if necessary
201
+ if not isinstance(bounds, Bounds):
202
+ if isinstance(bounds, list) or isinstance(bounds, tuple):
203
+ lb, ub = old_bound_to_new(bounds)
204
+ bounds = Bounds(lb, ub)
205
+ else:
206
+ message = ("bounds must be a sequence or "
207
+ "instance of Bounds class")
208
+ raise ValueError(message)
209
+
210
+ lb = np.ascontiguousarray(bounds.lb, dtype=np.float64)
211
+ ub = np.ascontiguousarray(bounds.ub, dtype=np.float64)
212
+
213
+ # validate bounds
214
+ # check that lower bounds are smaller than upper bounds
215
+ if not np.all(lb < ub):
216
+ raise ValueError('Bounds are not consistent min < max')
217
+ # check for infs
218
+ if (np.any(np.isinf(lb)) or np.any(np.isinf(ub))):
219
+ raise ValueError("Bounds must not be inf.")
220
+
221
+ # validate tolerances
222
+ if (vol_tol < 0 or vol_tol > 1):
223
+ raise ValueError("vol_tol must be between 0 and 1.")
224
+ if (len_tol < 0 or len_tol > 1):
225
+ raise ValueError("len_tol must be between 0 and 1.")
226
+ if (f_min_rtol < 0 or f_min_rtol > 1):
227
+ raise ValueError("f_min_rtol must be between 0 and 1.")
228
+
229
+ # validate maxfun and maxiter
230
+ if maxfun is None:
231
+ maxfun = 1000 * lb.shape[0]
232
+ if not isinstance(maxfun, int):
233
+ raise ValueError("maxfun must be of type int.")
234
+ if maxfun < 0:
235
+ raise ValueError("maxfun must be > 0.")
236
+ if not isinstance(maxiter, int):
237
+ raise ValueError("maxiter must be of type int.")
238
+ if maxiter < 0:
239
+ raise ValueError("maxiter must be > 0.")
240
+
241
+ # validate boolean parameters
242
+ if not isinstance(locally_biased, bool):
243
+ raise ValueError("locally_biased must be True or False.")
244
+
245
+ def _func_wrap(x, args=None):
246
+ x = np.asarray(x)
247
+ if args is None:
248
+ f = func(x)
249
+ else:
250
+ f = func(x, *args)
251
+ # always return a float
252
+ return np.asarray(f).item()
253
+
254
+ # TODO: fix disp argument
255
+ x, fun, ret_code, nfev, nit = _direct(
256
+ _func_wrap,
257
+ np.asarray(lb), np.asarray(ub),
258
+ args,
259
+ False, eps, maxfun, maxiter,
260
+ locally_biased,
261
+ f_min, f_min_rtol,
262
+ vol_tol, len_tol, callback
263
+ )
264
+
265
+ format_val = (maxfun, maxiter, f_min_rtol, vol_tol, len_tol)
266
+ if ret_code > 2:
267
+ message = SUCCESS_MESSAGES[ret_code - 3].format(
268
+ format_val[ret_code - 1])
269
+ elif 0 < ret_code <= 2:
270
+ message = ERROR_MESSAGES[ret_code - 1].format(format_val[ret_code - 1])
271
+ elif 0 > ret_code > -100:
272
+ message = ERROR_MESSAGES[abs(ret_code) + 1]
273
+ else:
274
+ message = ERROR_MESSAGES[ret_code + 99]
275
+
276
+ return OptimizeResult(x=np.asarray(x), fun=fun, status=ret_code,
277
+ success=ret_code > 2, message=message,
278
+ nfev=nfev, nit=nit)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_dual_annealing.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dual Annealing implementation.
2
+ # Copyright (c) 2018 Sylvain Gubian <[email protected]>,
3
+ # Yang Xiang <[email protected]>
4
+ # Author: Sylvain Gubian, Yang Xiang, PMP S.A.
5
+
6
+ """
7
+ A Dual Annealing global optimization algorithm
8
+ """
9
+
10
+ import numpy as np
11
+ from scipy.optimize import OptimizeResult
12
+ from scipy.optimize import minimize, Bounds
13
+ from scipy.special import gammaln
14
+ from scipy._lib._util import check_random_state
15
+ from scipy.optimize._constraints import new_bounds_to_old
16
+
17
+ __all__ = ['dual_annealing']
18
+
19
+
20
+ class VisitingDistribution:
21
+ """
22
+ Class used to generate new coordinates based on the distorted
23
+ Cauchy-Lorentz distribution. Depending on the steps within the strategy
24
+ chain, the class implements the strategy for generating new location
25
+ changes.
26
+
27
+ Parameters
28
+ ----------
29
+ lb : array_like
30
+ A 1-D NumPy ndarray containing lower bounds of the generated
31
+ components. Neither NaN or inf are allowed.
32
+ ub : array_like
33
+ A 1-D NumPy ndarray containing upper bounds for the generated
34
+ components. Neither NaN or inf are allowed.
35
+ visiting_param : float
36
+ Parameter for visiting distribution. Default value is 2.62.
37
+ Higher values give the visiting distribution a heavier tail, this
38
+ makes the algorithm jump to a more distant region.
39
+ The value range is (1, 3]. Its value is fixed for the life of the
40
+ object.
41
+ rand_gen : {`~numpy.random.RandomState`, `~numpy.random.Generator`}
42
+ A `~numpy.random.RandomState`, `~numpy.random.Generator` object
43
+ for using the current state of the created random generator container.
44
+
45
+ """
46
+ TAIL_LIMIT = 1.e8
47
+ MIN_VISIT_BOUND = 1.e-10
48
+
49
+ def __init__(self, lb, ub, visiting_param, rand_gen):
50
+ # if you wish to make _visiting_param adjustable during the life of
51
+ # the object then _factor2, _factor3, _factor5, _d1, _factor6 will
52
+ # have to be dynamically calculated in `visit_fn`. They're factored
53
+ # out here so they don't need to be recalculated all the time.
54
+ self._visiting_param = visiting_param
55
+ self.rand_gen = rand_gen
56
+ self.lower = lb
57
+ self.upper = ub
58
+ self.bound_range = ub - lb
59
+
60
+ # these are invariant numbers unless visiting_param changes
61
+ self._factor2 = np.exp((4.0 - self._visiting_param) * np.log(
62
+ self._visiting_param - 1.0))
63
+ self._factor3 = np.exp((2.0 - self._visiting_param) * np.log(2.0)
64
+ / (self._visiting_param - 1.0))
65
+ self._factor4_p = np.sqrt(np.pi) * self._factor2 / (self._factor3 * (
66
+ 3.0 - self._visiting_param))
67
+
68
+ self._factor5 = 1.0 / (self._visiting_param - 1.0) - 0.5
69
+ self._d1 = 2.0 - self._factor5
70
+ self._factor6 = np.pi * (1.0 - self._factor5) / np.sin(
71
+ np.pi * (1.0 - self._factor5)) / np.exp(gammaln(self._d1))
72
+
73
+ def visiting(self, x, step, temperature):
74
+ """ Based on the step in the strategy chain, new coordinates are
75
+ generated by changing all components is the same time or only
76
+ one of them, the new values are computed with visit_fn method
77
+ """
78
+ dim = x.size
79
+ if step < dim:
80
+ # Changing all coordinates with a new visiting value
81
+ visits = self.visit_fn(temperature, dim)
82
+ upper_sample, lower_sample = self.rand_gen.uniform(size=2)
83
+ visits[visits > self.TAIL_LIMIT] = self.TAIL_LIMIT * upper_sample
84
+ visits[visits < -self.TAIL_LIMIT] = -self.TAIL_LIMIT * lower_sample
85
+ x_visit = visits + x
86
+ a = x_visit - self.lower
87
+ b = np.fmod(a, self.bound_range) + self.bound_range
88
+ x_visit = np.fmod(b, self.bound_range) + self.lower
89
+ x_visit[np.fabs(
90
+ x_visit - self.lower) < self.MIN_VISIT_BOUND] += 1.e-10
91
+ else:
92
+ # Changing only one coordinate at a time based on strategy
93
+ # chain step
94
+ x_visit = np.copy(x)
95
+ visit = self.visit_fn(temperature, 1)[0]
96
+ if visit > self.TAIL_LIMIT:
97
+ visit = self.TAIL_LIMIT * self.rand_gen.uniform()
98
+ elif visit < -self.TAIL_LIMIT:
99
+ visit = -self.TAIL_LIMIT * self.rand_gen.uniform()
100
+ index = step - dim
101
+ x_visit[index] = visit + x[index]
102
+ a = x_visit[index] - self.lower[index]
103
+ b = np.fmod(a, self.bound_range[index]) + self.bound_range[index]
104
+ x_visit[index] = np.fmod(b, self.bound_range[
105
+ index]) + self.lower[index]
106
+ if np.fabs(x_visit[index] - self.lower[
107
+ index]) < self.MIN_VISIT_BOUND:
108
+ x_visit[index] += self.MIN_VISIT_BOUND
109
+ return x_visit
110
+
111
+ def visit_fn(self, temperature, dim):
112
+ """ Formula Visita from p. 405 of reference [2] """
113
+ x, y = self.rand_gen.normal(size=(dim, 2)).T
114
+
115
+ factor1 = np.exp(np.log(temperature) / (self._visiting_param - 1.0))
116
+ factor4 = self._factor4_p * factor1
117
+
118
+ # sigmax
119
+ x *= np.exp(-(self._visiting_param - 1.0) * np.log(
120
+ self._factor6 / factor4) / (3.0 - self._visiting_param))
121
+
122
+ den = np.exp((self._visiting_param - 1.0) * np.log(np.fabs(y)) /
123
+ (3.0 - self._visiting_param))
124
+
125
+ return x / den
126
+
127
+
128
+ class EnergyState:
129
+ """
130
+ Class used to record the energy state. At any time, it knows what is the
131
+ currently used coordinates and the most recent best location.
132
+
133
+ Parameters
134
+ ----------
135
+ lower : array_like
136
+ A 1-D NumPy ndarray containing lower bounds for generating an initial
137
+ random components in the `reset` method.
138
+ upper : array_like
139
+ A 1-D NumPy ndarray containing upper bounds for generating an initial
140
+ random components in the `reset` method
141
+ components. Neither NaN or inf are allowed.
142
+ callback : callable, ``callback(x, f, context)``, optional
143
+ A callback function which will be called for all minima found.
144
+ ``x`` and ``f`` are the coordinates and function value of the
145
+ latest minimum found, and `context` has value in [0, 1, 2]
146
+ """
147
+ # Maximum number of trials for generating a valid starting point
148
+ MAX_REINIT_COUNT = 1000
149
+
150
+ def __init__(self, lower, upper, callback=None):
151
+ self.ebest = None
152
+ self.current_energy = None
153
+ self.current_location = None
154
+ self.xbest = None
155
+ self.lower = lower
156
+ self.upper = upper
157
+ self.callback = callback
158
+
159
+ def reset(self, func_wrapper, rand_gen, x0=None):
160
+ """
161
+ Initialize current location is the search domain. If `x0` is not
162
+ provided, a random location within the bounds is generated.
163
+ """
164
+ if x0 is None:
165
+ self.current_location = rand_gen.uniform(self.lower, self.upper,
166
+ size=len(self.lower))
167
+ else:
168
+ self.current_location = np.copy(x0)
169
+ init_error = True
170
+ reinit_counter = 0
171
+ while init_error:
172
+ self.current_energy = func_wrapper.fun(self.current_location)
173
+ if self.current_energy is None:
174
+ raise ValueError('Objective function is returning None')
175
+ if (not np.isfinite(self.current_energy) or np.isnan(
176
+ self.current_energy)):
177
+ if reinit_counter >= EnergyState.MAX_REINIT_COUNT:
178
+ init_error = False
179
+ message = (
180
+ 'Stopping algorithm because function '
181
+ 'create NaN or (+/-) infinity values even with '
182
+ 'trying new random parameters'
183
+ )
184
+ raise ValueError(message)
185
+ self.current_location = rand_gen.uniform(self.lower,
186
+ self.upper,
187
+ size=self.lower.size)
188
+ reinit_counter += 1
189
+ else:
190
+ init_error = False
191
+ # If first time reset, initialize ebest and xbest
192
+ if self.ebest is None and self.xbest is None:
193
+ self.ebest = self.current_energy
194
+ self.xbest = np.copy(self.current_location)
195
+ # Otherwise, we keep them in case of reannealing reset
196
+
197
+ def update_best(self, e, x, context):
198
+ self.ebest = e
199
+ self.xbest = np.copy(x)
200
+ if self.callback is not None:
201
+ val = self.callback(x, e, context)
202
+ if val is not None:
203
+ if val:
204
+ return ('Callback function requested to stop early by '
205
+ 'returning True')
206
+
207
+ def update_current(self, e, x):
208
+ self.current_energy = e
209
+ self.current_location = np.copy(x)
210
+
211
+
212
+ class StrategyChain:
213
+ """
214
+ Class that implements within a Markov chain the strategy for location
215
+ acceptance and local search decision making.
216
+
217
+ Parameters
218
+ ----------
219
+ acceptance_param : float
220
+ Parameter for acceptance distribution. It is used to control the
221
+ probability of acceptance. The lower the acceptance parameter, the
222
+ smaller the probability of acceptance. Default value is -5.0 with
223
+ a range (-1e4, -5].
224
+ visit_dist : VisitingDistribution
225
+ Instance of `VisitingDistribution` class.
226
+ func_wrapper : ObjectiveFunWrapper
227
+ Instance of `ObjectiveFunWrapper` class.
228
+ minimizer_wrapper: LocalSearchWrapper
229
+ Instance of `LocalSearchWrapper` class.
230
+ rand_gen : {None, int, `numpy.random.Generator`,
231
+ `numpy.random.RandomState`}, optional
232
+
233
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
234
+ singleton is used.
235
+ If `seed` is an int, a new ``RandomState`` instance is used,
236
+ seeded with `seed`.
237
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
238
+ that instance is used.
239
+ energy_state: EnergyState
240
+ Instance of `EnergyState` class.
241
+
242
+ """
243
+
244
+ def __init__(self, acceptance_param, visit_dist, func_wrapper,
245
+ minimizer_wrapper, rand_gen, energy_state):
246
+ # Local strategy chain minimum energy and location
247
+ self.emin = energy_state.current_energy
248
+ self.xmin = np.array(energy_state.current_location)
249
+ # Global optimizer state
250
+ self.energy_state = energy_state
251
+ # Acceptance parameter
252
+ self.acceptance_param = acceptance_param
253
+ # Visiting distribution instance
254
+ self.visit_dist = visit_dist
255
+ # Wrapper to objective function
256
+ self.func_wrapper = func_wrapper
257
+ # Wrapper to the local minimizer
258
+ self.minimizer_wrapper = minimizer_wrapper
259
+ self.not_improved_idx = 0
260
+ self.not_improved_max_idx = 1000
261
+ self._rand_gen = rand_gen
262
+ self.temperature_step = 0
263
+ self.K = 100 * len(energy_state.current_location)
264
+
265
+ def accept_reject(self, j, e, x_visit):
266
+ r = self._rand_gen.uniform()
267
+ pqv_temp = 1.0 - ((1.0 - self.acceptance_param) *
268
+ (e - self.energy_state.current_energy) / self.temperature_step)
269
+ if pqv_temp <= 0.:
270
+ pqv = 0.
271
+ else:
272
+ pqv = np.exp(np.log(pqv_temp) / (
273
+ 1. - self.acceptance_param))
274
+
275
+ if r <= pqv:
276
+ # We accept the new location and update state
277
+ self.energy_state.update_current(e, x_visit)
278
+ self.xmin = np.copy(self.energy_state.current_location)
279
+
280
+ # No improvement for a long time
281
+ if self.not_improved_idx >= self.not_improved_max_idx:
282
+ if j == 0 or self.energy_state.current_energy < self.emin:
283
+ self.emin = self.energy_state.current_energy
284
+ self.xmin = np.copy(self.energy_state.current_location)
285
+
286
+ def run(self, step, temperature):
287
+ self.temperature_step = temperature / float(step + 1)
288
+ self.not_improved_idx += 1
289
+ for j in range(self.energy_state.current_location.size * 2):
290
+ if j == 0:
291
+ if step == 0:
292
+ self.energy_state_improved = True
293
+ else:
294
+ self.energy_state_improved = False
295
+ x_visit = self.visit_dist.visiting(
296
+ self.energy_state.current_location, j, temperature)
297
+ # Calling the objective function
298
+ e = self.func_wrapper.fun(x_visit)
299
+ if e < self.energy_state.current_energy:
300
+ # We have got a better energy value
301
+ self.energy_state.update_current(e, x_visit)
302
+ if e < self.energy_state.ebest:
303
+ val = self.energy_state.update_best(e, x_visit, 0)
304
+ if val is not None:
305
+ if val:
306
+ return val
307
+ self.energy_state_improved = True
308
+ self.not_improved_idx = 0
309
+ else:
310
+ # We have not improved but do we accept the new location?
311
+ self.accept_reject(j, e, x_visit)
312
+ if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
313
+ return ('Maximum number of function call reached '
314
+ 'during annealing')
315
+ # End of StrategyChain loop
316
+
317
+ def local_search(self):
318
+ # Decision making for performing a local search
319
+ # based on strategy chain results
320
+ # If energy has been improved or no improvement since too long,
321
+ # performing a local search with the best strategy chain location
322
+ if self.energy_state_improved:
323
+ # Global energy has improved, let's see if LS improves further
324
+ e, x = self.minimizer_wrapper.local_search(self.energy_state.xbest,
325
+ self.energy_state.ebest)
326
+ if e < self.energy_state.ebest:
327
+ self.not_improved_idx = 0
328
+ val = self.energy_state.update_best(e, x, 1)
329
+ if val is not None:
330
+ if val:
331
+ return val
332
+ self.energy_state.update_current(e, x)
333
+ if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
334
+ return ('Maximum number of function call reached '
335
+ 'during local search')
336
+ # Check probability of a need to perform a LS even if no improvement
337
+ do_ls = False
338
+ if self.K < 90 * len(self.energy_state.current_location):
339
+ pls = np.exp(self.K * (
340
+ self.energy_state.ebest - self.energy_state.current_energy) /
341
+ self.temperature_step)
342
+ if pls >= self._rand_gen.uniform():
343
+ do_ls = True
344
+ # Global energy not improved, let's see what LS gives
345
+ # on the best strategy chain location
346
+ if self.not_improved_idx >= self.not_improved_max_idx:
347
+ do_ls = True
348
+ if do_ls:
349
+ e, x = self.minimizer_wrapper.local_search(self.xmin, self.emin)
350
+ self.xmin = np.copy(x)
351
+ self.emin = e
352
+ self.not_improved_idx = 0
353
+ self.not_improved_max_idx = self.energy_state.current_location.size
354
+ if e < self.energy_state.ebest:
355
+ val = self.energy_state.update_best(
356
+ self.emin, self.xmin, 2)
357
+ if val is not None:
358
+ if val:
359
+ return val
360
+ self.energy_state.update_current(e, x)
361
+ if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
362
+ return ('Maximum number of function call reached '
363
+ 'during dual annealing')
364
+
365
+
366
+ class ObjectiveFunWrapper:
367
+
368
+ def __init__(self, func, maxfun=1e7, *args):
369
+ self.func = func
370
+ self.args = args
371
+ # Number of objective function evaluations
372
+ self.nfev = 0
373
+ # Number of gradient function evaluation if used
374
+ self.ngev = 0
375
+ # Number of hessian of the objective function if used
376
+ self.nhev = 0
377
+ self.maxfun = maxfun
378
+
379
+ def fun(self, x):
380
+ self.nfev += 1
381
+ return self.func(x, *self.args)
382
+
383
+
384
+ class LocalSearchWrapper:
385
+ """
386
+ Class used to wrap around the minimizer used for local search
387
+ Default local minimizer is SciPy minimizer L-BFGS-B
388
+ """
389
+
390
+ LS_MAXITER_RATIO = 6
391
+ LS_MAXITER_MIN = 100
392
+ LS_MAXITER_MAX = 1000
393
+
394
+ def __init__(self, search_bounds, func_wrapper, *args, **kwargs):
395
+ self.func_wrapper = func_wrapper
396
+ self.kwargs = kwargs
397
+ self.jac = self.kwargs.get('jac', None)
398
+ self.hess = self.kwargs.get('hess', None)
399
+ self.hessp = self.kwargs.get('hessp', None)
400
+ self.kwargs.pop("args", None)
401
+ self.minimizer = minimize
402
+ bounds_list = list(zip(*search_bounds))
403
+ self.lower = np.array(bounds_list[0])
404
+ self.upper = np.array(bounds_list[1])
405
+
406
+ # If no minimizer specified, use SciPy minimize with 'L-BFGS-B' method
407
+ if not self.kwargs:
408
+ n = len(self.lower)
409
+ ls_max_iter = min(max(n * self.LS_MAXITER_RATIO,
410
+ self.LS_MAXITER_MIN),
411
+ self.LS_MAXITER_MAX)
412
+ self.kwargs['method'] = 'L-BFGS-B'
413
+ self.kwargs['options'] = {
414
+ 'maxiter': ls_max_iter,
415
+ }
416
+ self.kwargs['bounds'] = list(zip(self.lower, self.upper))
417
+ else:
418
+ if callable(self.jac):
419
+ def wrapped_jac(x):
420
+ return self.jac(x, *args)
421
+ self.kwargs['jac'] = wrapped_jac
422
+ if callable(self.hess):
423
+ def wrapped_hess(x):
424
+ return self.hess(x, *args)
425
+ self.kwargs['hess'] = wrapped_hess
426
+ if callable(self.hessp):
427
+ def wrapped_hessp(x, p):
428
+ return self.hessp(x, p, *args)
429
+ self.kwargs['hessp'] = wrapped_hessp
430
+
431
+ def local_search(self, x, e):
432
+ # Run local search from the given x location where energy value is e
433
+ x_tmp = np.copy(x)
434
+ mres = self.minimizer(self.func_wrapper.fun, x, **self.kwargs)
435
+ if 'njev' in mres:
436
+ self.func_wrapper.ngev += mres.njev
437
+ if 'nhev' in mres:
438
+ self.func_wrapper.nhev += mres.nhev
439
+ # Check if is valid value
440
+ is_finite = np.all(np.isfinite(mres.x)) and np.isfinite(mres.fun)
441
+ in_bounds = np.all(mres.x >= self.lower) and np.all(
442
+ mres.x <= self.upper)
443
+ is_valid = is_finite and in_bounds
444
+
445
+ # Use the new point only if it is valid and return a better results
446
+ if is_valid and mres.fun < e:
447
+ return mres.fun, mres.x
448
+ else:
449
+ return e, x_tmp
450
+
451
+
452
+ def dual_annealing(func, bounds, args=(), maxiter=1000,
453
+ minimizer_kwargs=None, initial_temp=5230.,
454
+ restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
455
+ maxfun=1e7, seed=None, no_local_search=False,
456
+ callback=None, x0=None):
457
+ """
458
+ Find the global minimum of a function using Dual Annealing.
459
+
460
+ Parameters
461
+ ----------
462
+ func : callable
463
+ The objective function to be minimized. Must be in the form
464
+ ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
465
+ and ``args`` is a tuple of any additional fixed parameters needed to
466
+ completely specify the function.
467
+ bounds : sequence or `Bounds`
468
+ Bounds for variables. There are two ways to specify the bounds:
469
+
470
+ 1. Instance of `Bounds` class.
471
+ 2. Sequence of ``(min, max)`` pairs for each element in `x`.
472
+
473
+ args : tuple, optional
474
+ Any additional fixed parameters needed to completely specify the
475
+ objective function.
476
+ maxiter : int, optional
477
+ The maximum number of global search iterations. Default value is 1000.
478
+ minimizer_kwargs : dict, optional
479
+ Keyword arguments to be passed to the local minimizer
480
+ (`minimize`). An important option could be ``method`` for the minimizer
481
+ method to use.
482
+ If no keyword arguments are provided, the local minimizer defaults to
483
+ 'L-BFGS-B' and uses the already supplied bounds. If `minimizer_kwargs`
484
+ is specified, then the dict must contain all parameters required to
485
+ control the local minimization. `args` is ignored in this dict, as it is
486
+ passed automatically. `bounds` is not automatically passed on to the
487
+ local minimizer as the method may not support them.
488
+ initial_temp : float, optional
489
+ The initial temperature, use higher values to facilitates a wider
490
+ search of the energy landscape, allowing dual_annealing to escape
491
+ local minima that it is trapped in. Default value is 5230. Range is
492
+ (0.01, 5.e4].
493
+ restart_temp_ratio : float, optional
494
+ During the annealing process, temperature is decreasing, when it
495
+ reaches ``initial_temp * restart_temp_ratio``, the reannealing process
496
+ is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
497
+ visit : float, optional
498
+ Parameter for visiting distribution. Default value is 2.62. Higher
499
+ values give the visiting distribution a heavier tail, this makes
500
+ the algorithm jump to a more distant region. The value range is (1, 3].
501
+ accept : float, optional
502
+ Parameter for acceptance distribution. It is used to control the
503
+ probability of acceptance. The lower the acceptance parameter, the
504
+ smaller the probability of acceptance. Default value is -5.0 with
505
+ a range (-1e4, -5].
506
+ maxfun : int, optional
507
+ Soft limit for the number of objective function calls. If the
508
+ algorithm is in the middle of a local search, this number will be
509
+ exceeded, the algorithm will stop just after the local search is
510
+ done. Default value is 1e7.
511
+ seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
512
+ If `seed` is None (or `np.random`), the `numpy.random.RandomState`
513
+ singleton is used.
514
+ If `seed` is an int, a new ``RandomState`` instance is used,
515
+ seeded with `seed`.
516
+ If `seed` is already a ``Generator`` or ``RandomState`` instance then
517
+ that instance is used.
518
+ Specify `seed` for repeatable minimizations. The random numbers
519
+ generated with this seed only affect the visiting distribution function
520
+ and new coordinates generation.
521
+ no_local_search : bool, optional
522
+ If `no_local_search` is set to True, a traditional Generalized
523
+ Simulated Annealing will be performed with no local search
524
+ strategy applied.
525
+ callback : callable, optional
526
+ A callback function with signature ``callback(x, f, context)``,
527
+ which will be called for all minima found.
528
+ ``x`` and ``f`` are the coordinates and function value of the
529
+ latest minimum found, and ``context`` has value in [0, 1, 2], with the
530
+ following meaning:
531
+
532
+ - 0: minimum detected in the annealing process.
533
+ - 1: detection occurred in the local search process.
534
+ - 2: detection done in the dual annealing process.
535
+
536
+ If the callback implementation returns True, the algorithm will stop.
537
+ x0 : ndarray, shape(n,), optional
538
+ Coordinates of a single N-D starting point.
539
+
540
+ Returns
541
+ -------
542
+ res : OptimizeResult
543
+ The optimization result represented as a `OptimizeResult` object.
544
+ Important attributes are: ``x`` the solution array, ``fun`` the value
545
+ of the function at the solution, and ``message`` which describes the
546
+ cause of the termination.
547
+ See `OptimizeResult` for a description of other attributes.
548
+
549
+ Notes
550
+ -----
551
+ This function implements the Dual Annealing optimization. This stochastic
552
+ approach derived from [3]_ combines the generalization of CSA (Classical
553
+ Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
554
+ to a strategy for applying a local search on accepted locations [4]_.
555
+ An alternative implementation of this same algorithm is described in [5]_
556
+ and benchmarks are presented in [6]_. This approach introduces an advanced
557
+ method to refine the solution found by the generalized annealing
558
+ process. This algorithm uses a distorted Cauchy-Lorentz visiting
559
+ distribution, with its shape controlled by the parameter :math:`q_{v}`
560
+
561
+ .. math::
562
+
563
+ g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
564
+ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
565
+ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
566
+ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
567
+ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}
568
+
569
+ Where :math:`t` is the artificial time. This visiting distribution is used
570
+ to generate a trial jump distance :math:`\\Delta x(t)` of variable
571
+ :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.
572
+
573
+ From the starting point, after calling the visiting distribution
574
+ function, the acceptance probability is computed as follows:
575
+
576
+ .. math::
577
+
578
+ p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
579
+ \\frac{1}{1-q_{a}}}\\}}
580
+
581
+ Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
582
+ acceptance probability is assigned to the cases where
583
+
584
+ .. math::
585
+
586
+ [1-(1-q_{a}) \\beta \\Delta E] < 0
587
+
588
+ The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to
589
+
590
+ .. math::
591
+
592
+ T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
593
+ 1 + t\\right)^{q_{v}-1}-1}
594
+
595
+ Where :math:`q_{v}` is the visiting parameter.
596
+
597
+ .. versionadded:: 1.2.0
598
+
599
+ References
600
+ ----------
601
+ .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
602
+ statistics. Journal of Statistical Physics, 52, 479-487 (1998).
603
+ .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
604
+ Physica A, 233, 395-406 (1996).
605
+ .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
606
+ Annealing Algorithm and Its Application to the Thomson Model.
607
+ Physics Letters A, 233, 216-220 (1997).
608
+ .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
609
+ Annealing. Physical Review E, 62, 4473 (2000).
610
+ .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
611
+ Simulated Annealing for Efficient Global Optimization: the GenSA
612
+ Package for R. The R Journal, Volume 5/1 (2013).
613
+ .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
614
+ Statistical Software, 60(6), 1 - 45, (2014).
615
+ :doi:`10.18637/jss.v060.i06`
616
+
617
+ Examples
618
+ --------
619
+ The following example is a 10-D problem, with many local minima.
620
+ The function involved is called Rastrigin
621
+ (https://en.wikipedia.org/wiki/Rastrigin_function)
622
+
623
+ >>> import numpy as np
624
+ >>> from scipy.optimize import dual_annealing
625
+ >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
626
+ >>> lw = [-5.12] * 10
627
+ >>> up = [5.12] * 10
628
+ >>> ret = dual_annealing(func, bounds=list(zip(lw, up)))
629
+ >>> ret.x
630
+ array([-4.26437714e-09, -3.91699361e-09, -1.86149218e-09, -3.97165720e-09,
631
+ -6.29151648e-09, -6.53145322e-09, -3.93616815e-09, -6.55623025e-09,
632
+ -6.05775280e-09, -5.00668935e-09]) # random
633
+ >>> ret.fun
634
+ 0.000000
635
+
636
+ """
637
+
638
+ if isinstance(bounds, Bounds):
639
+ bounds = new_bounds_to_old(bounds.lb, bounds.ub, len(bounds.lb))
640
+
641
+ if x0 is not None and not len(x0) == len(bounds):
642
+ raise ValueError('Bounds size does not match x0')
643
+
644
+ lu = list(zip(*bounds))
645
+ lower = np.array(lu[0])
646
+ upper = np.array(lu[1])
647
+ # Check that restart temperature ratio is correct
648
+ if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
649
+ raise ValueError('Restart temperature ratio has to be in range (0, 1)')
650
+ # Checking bounds are valid
651
+ if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
652
+ np.isnan(lower)) or np.any(np.isnan(upper))):
653
+ raise ValueError('Some bounds values are inf values or nan values')
654
+ # Checking that bounds are consistent
655
+ if not np.all(lower < upper):
656
+ raise ValueError('Bounds are not consistent min < max')
657
+ # Checking that bounds are the same length
658
+ if not len(lower) == len(upper):
659
+ raise ValueError('Bounds do not have the same dimensions')
660
+
661
+ # Wrapper for the objective function
662
+ func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
663
+
664
+ # minimizer_kwargs has to be a dict, not None
665
+ minimizer_kwargs = minimizer_kwargs or {}
666
+
667
+ minimizer_wrapper = LocalSearchWrapper(
668
+ bounds, func_wrapper, *args, **minimizer_kwargs)
669
+
670
+ # Initialization of random Generator for reproducible runs if seed provided
671
+ rand_state = check_random_state(seed)
672
+ # Initialization of the energy state
673
+ energy_state = EnergyState(lower, upper, callback)
674
+ energy_state.reset(func_wrapper, rand_state, x0)
675
+ # Minimum value of annealing temperature reached to perform
676
+ # re-annealing
677
+ temperature_restart = initial_temp * restart_temp_ratio
678
+ # VisitingDistribution instance
679
+ visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
680
+ # Strategy chain instance
681
+ strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
682
+ minimizer_wrapper, rand_state, energy_state)
683
+ need_to_stop = False
684
+ iteration = 0
685
+ message = []
686
+ # OptimizeResult object to be returned
687
+ optimize_res = OptimizeResult()
688
+ optimize_res.success = True
689
+ optimize_res.status = 0
690
+
691
+ t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
692
+ # Run the search loop
693
+ while not need_to_stop:
694
+ for i in range(maxiter):
695
+ # Compute temperature for this step
696
+ s = float(i) + 2.0
697
+ t2 = np.exp((visit - 1) * np.log(s)) - 1.0
698
+ temperature = initial_temp * t1 / t2
699
+ if iteration >= maxiter:
700
+ message.append("Maximum number of iteration reached")
701
+ need_to_stop = True
702
+ break
703
+ # Need a re-annealing process?
704
+ if temperature < temperature_restart:
705
+ energy_state.reset(func_wrapper, rand_state)
706
+ break
707
+ # starting strategy chain
708
+ val = strategy_chain.run(i, temperature)
709
+ if val is not None:
710
+ message.append(val)
711
+ need_to_stop = True
712
+ optimize_res.success = False
713
+ break
714
+ # Possible local search at the end of the strategy chain
715
+ if not no_local_search:
716
+ val = strategy_chain.local_search()
717
+ if val is not None:
718
+ message.append(val)
719
+ need_to_stop = True
720
+ optimize_res.success = False
721
+ break
722
+ iteration += 1
723
+
724
+ # Setting the OptimizeResult values
725
+ optimize_res.x = energy_state.xbest
726
+ optimize_res.fun = energy_state.ebest
727
+ optimize_res.nit = iteration
728
+ optimize_res.nfev = func_wrapper.nfev
729
+ optimize_res.njev = func_wrapper.ngev
730
+ optimize_res.nhev = func_wrapper.nhev
731
+ optimize_res.message = message
732
+ return optimize_res
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_group_columns.cpython-310-x86_64-linux-gnu.so ADDED
Binary file (99.8 kB). View file
 
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_hessian_update_strategy.py ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hessian update strategies for quasi-Newton optimization methods."""
2
+ import numpy as np
3
+ from numpy.linalg import norm
4
+ from scipy.linalg import get_blas_funcs, issymmetric
5
+ from warnings import warn
6
+
7
+
8
+ __all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
9
+
10
+
11
+ class HessianUpdateStrategy:
12
+ """Interface for implementing Hessian update strategies.
13
+
14
+ Many optimization methods make use of Hessian (or inverse Hessian)
15
+ approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
16
+ Some of these approximations, however, do not actually need to store
17
+ the entire matrix or can compute the internal matrix product with a
18
+ given vector in a very efficiently manner. This class serves as an
19
+ abstract interface between the optimization algorithm and the
20
+ quasi-Newton update strategies, giving freedom of implementation
21
+ to store and update the internal matrix as efficiently as possible.
22
+ Different choices of initialization and update procedure will result
23
+ in different quasi-Newton strategies.
24
+
25
+ Four methods should be implemented in derived classes: ``initialize``,
26
+ ``update``, ``dot`` and ``get_matrix``.
27
+
28
+ Notes
29
+ -----
30
+ Any instance of a class that implements this interface,
31
+ can be accepted by the method ``minimize`` and used by
32
+ the compatible solvers to approximate the Hessian (or
33
+ inverse Hessian) used by the optimization algorithms.
34
+ """
35
+
36
+ def initialize(self, n, approx_type):
37
+ """Initialize internal matrix.
38
+
39
+ Allocate internal memory for storing and updating
40
+ the Hessian or its inverse.
41
+
42
+ Parameters
43
+ ----------
44
+ n : int
45
+ Problem dimension.
46
+ approx_type : {'hess', 'inv_hess'}
47
+ Selects either the Hessian or the inverse Hessian.
48
+ When set to 'hess' the Hessian will be stored and updated.
49
+ When set to 'inv_hess' its inverse will be used instead.
50
+ """
51
+ raise NotImplementedError("The method ``initialize(n, approx_type)``"
52
+ " is not implemented.")
53
+
54
+ def update(self, delta_x, delta_grad):
55
+ """Update internal matrix.
56
+
57
+ Update Hessian matrix or its inverse (depending on how 'approx_type'
58
+ is defined) using information about the last evaluated points.
59
+
60
+ Parameters
61
+ ----------
62
+ delta_x : ndarray
63
+ The difference between two points the gradient
64
+ function have been evaluated at: ``delta_x = x2 - x1``.
65
+ delta_grad : ndarray
66
+ The difference between the gradients:
67
+ ``delta_grad = grad(x2) - grad(x1)``.
68
+ """
69
+ raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
70
+ " is not implemented.")
71
+
72
+ def dot(self, p):
73
+ """Compute the product of the internal matrix with the given vector.
74
+
75
+ Parameters
76
+ ----------
77
+ p : array_like
78
+ 1-D array representing a vector.
79
+
80
+ Returns
81
+ -------
82
+ Hp : array
83
+ 1-D represents the result of multiplying the approximation matrix
84
+ by vector p.
85
+ """
86
+ raise NotImplementedError("The method ``dot(p)``"
87
+ " is not implemented.")
88
+
89
+ def get_matrix(self):
90
+ """Return current internal matrix.
91
+
92
+ Returns
93
+ -------
94
+ H : ndarray, shape (n, n)
95
+ Dense matrix containing either the Hessian
96
+ or its inverse (depending on how 'approx_type'
97
+ is defined).
98
+ """
99
+ raise NotImplementedError("The method ``get_matrix(p)``"
100
+ " is not implemented.")
101
+
102
+
103
+ class FullHessianUpdateStrategy(HessianUpdateStrategy):
104
+ """Hessian update strategy with full dimensional internal representation.
105
+ """
106
+ _syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update
107
+ _syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update
108
+ # Symmetric matrix-vector product
109
+ _symv = get_blas_funcs('symv', dtype='d')
110
+
111
+ def __init__(self, init_scale='auto'):
112
+ self.init_scale = init_scale
113
+ # Until initialize is called we can't really use the class,
114
+ # so it makes sense to set everything to None.
115
+ self.first_iteration = None
116
+ self.approx_type = None
117
+ self.B = None
118
+ self.H = None
119
+
120
+ def initialize(self, n, approx_type):
121
+ """Initialize internal matrix.
122
+
123
+ Allocate internal memory for storing and updating
124
+ the Hessian or its inverse.
125
+
126
+ Parameters
127
+ ----------
128
+ n : int
129
+ Problem dimension.
130
+ approx_type : {'hess', 'inv_hess'}
131
+ Selects either the Hessian or the inverse Hessian.
132
+ When set to 'hess' the Hessian will be stored and updated.
133
+ When set to 'inv_hess' its inverse will be used instead.
134
+ """
135
+ self.first_iteration = True
136
+ self.n = n
137
+ self.approx_type = approx_type
138
+ if approx_type not in ('hess', 'inv_hess'):
139
+ raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
140
+ # Create matrix
141
+ if self.approx_type == 'hess':
142
+ self.B = np.eye(n, dtype=float)
143
+ else:
144
+ self.H = np.eye(n, dtype=float)
145
+
146
+ def _auto_scale(self, delta_x, delta_grad):
147
+ # Heuristic to scale matrix at first iteration.
148
+ # Described in Nocedal and Wright "Numerical Optimization"
149
+ # p.143 formula (6.20).
150
+ s_norm2 = np.dot(delta_x, delta_x)
151
+ y_norm2 = np.dot(delta_grad, delta_grad)
152
+ ys = np.abs(np.dot(delta_grad, delta_x))
153
+ if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
154
+ return 1
155
+ if self.approx_type == 'hess':
156
+ return y_norm2 / ys
157
+ else:
158
+ return ys / y_norm2
159
+
160
+ def _update_implementation(self, delta_x, delta_grad):
161
+ raise NotImplementedError("The method ``_update_implementation``"
162
+ " is not implemented.")
163
+
164
+ def update(self, delta_x, delta_grad):
165
+ """Update internal matrix.
166
+
167
+ Update Hessian matrix or its inverse (depending on how 'approx_type'
168
+ is defined) using information about the last evaluated points.
169
+
170
+ Parameters
171
+ ----------
172
+ delta_x : ndarray
173
+ The difference between two points the gradient
174
+ function have been evaluated at: ``delta_x = x2 - x1``.
175
+ delta_grad : ndarray
176
+ The difference between the gradients:
177
+ ``delta_grad = grad(x2) - grad(x1)``.
178
+ """
179
+ if np.all(delta_x == 0.0):
180
+ return
181
+ if np.all(delta_grad == 0.0):
182
+ warn('delta_grad == 0.0. Check if the approximated '
183
+ 'function is linear. If the function is linear '
184
+ 'better results can be obtained by defining the '
185
+ 'Hessian as zero instead of using quasi-Newton '
186
+ 'approximations.',
187
+ UserWarning, stacklevel=2)
188
+ return
189
+ if self.first_iteration:
190
+ # Get user specific scale
191
+ if isinstance(self.init_scale, str) and self.init_scale == "auto":
192
+ scale = self._auto_scale(delta_x, delta_grad)
193
+ else:
194
+ scale = self.init_scale
195
+
196
+ # Check for complex: numpy will silently cast a complex array to
197
+ # a real one but not so for scalar as it raises a TypeError.
198
+ # Checking here brings a consistent behavior.
199
+ replace = False
200
+ if np.size(scale) == 1:
201
+ # to account for the legacy behavior having the exact same cast
202
+ scale = float(scale)
203
+ elif np.iscomplexobj(scale):
204
+ raise TypeError("init_scale contains complex elements, "
205
+ "must be real.")
206
+ else: # test explicitly for allowed shapes and values
207
+ replace = True
208
+ if self.approx_type == 'hess':
209
+ shape = np.shape(self.B)
210
+ dtype = self.B.dtype
211
+ else:
212
+ shape = np.shape(self.H)
213
+ dtype = self.H.dtype
214
+ # copy, will replace the original
215
+ scale = np.array(scale, dtype=dtype, copy=True)
216
+
217
+ # it has to match the shape of the matrix for the multiplication,
218
+ # no implicit broadcasting is allowed
219
+ if shape != (init_shape := np.shape(scale)):
220
+ raise ValueError("If init_scale is an array, it must have the "
221
+ f"dimensions of the hess/inv_hess: {shape}."
222
+ f" Got {init_shape}.")
223
+ if not issymmetric(scale):
224
+ raise ValueError("If init_scale is an array, it must be"
225
+ " symmetric (passing scipy.linalg.issymmetric)"
226
+ " to be an approximation of a hess/inv_hess.")
227
+
228
+ # Scale initial matrix with ``scale * np.eye(n)`` or replace
229
+ # This is not ideal, we could assign the scale directly in
230
+ # initialize, but we would need to
231
+ if self.approx_type == 'hess':
232
+ if replace:
233
+ self.B = scale
234
+ else:
235
+ self.B *= scale
236
+ else:
237
+ if replace:
238
+ self.H = scale
239
+ else:
240
+ self.H *= scale
241
+ self.first_iteration = False
242
+ self._update_implementation(delta_x, delta_grad)
243
+
244
+ def dot(self, p):
245
+ """Compute the product of the internal matrix with the given vector.
246
+
247
+ Parameters
248
+ ----------
249
+ p : array_like
250
+ 1-D array representing a vector.
251
+
252
+ Returns
253
+ -------
254
+ Hp : array
255
+ 1-D represents the result of multiplying the approximation matrix
256
+ by vector p.
257
+ """
258
+ if self.approx_type == 'hess':
259
+ return self._symv(1, self.B, p)
260
+ else:
261
+ return self._symv(1, self.H, p)
262
+
263
+ def get_matrix(self):
264
+ """Return the current internal matrix.
265
+
266
+ Returns
267
+ -------
268
+ M : ndarray, shape (n, n)
269
+ Dense matrix containing either the Hessian or its inverse
270
+ (depending on how `approx_type` was defined).
271
+ """
272
+ if self.approx_type == 'hess':
273
+ M = np.copy(self.B)
274
+ else:
275
+ M = np.copy(self.H)
276
+ li = np.tril_indices_from(M, k=-1)
277
+ M[li] = M.T[li]
278
+ return M
279
+
280
+
281
+ class BFGS(FullHessianUpdateStrategy):
282
+ """Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
283
+
284
+ Parameters
285
+ ----------
286
+ exception_strategy : {'skip_update', 'damp_update'}, optional
287
+ Define how to proceed when the curvature condition is violated.
288
+ Set it to 'skip_update' to just skip the update. Or, alternatively,
289
+ set it to 'damp_update' to interpolate between the actual BFGS
290
+ result and the unmodified matrix. Both exceptions strategies
291
+ are explained in [1]_, p.536-537.
292
+ min_curvature : float
293
+ This number, scaled by a normalization factor, defines the
294
+ minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
295
+ unaffected by the exception strategy. By default is equal to
296
+ 1e-8 when ``exception_strategy = 'skip_update'`` and equal
297
+ to 0.2 when ``exception_strategy = 'damp_update'``.
298
+ init_scale : {float, np.array, 'auto'}
299
+ This parameter can be used to initialize the Hessian or its
300
+ inverse. When a float is given, the relevant array is initialized
301
+ to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
302
+ Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
303
+ this array will be used. Otherwise an error is generated.
304
+ Set it to 'auto' in order to use an automatic heuristic for choosing
305
+ the initial scale. The heuristic is described in [1]_, p.143.
306
+ The default is 'auto'.
307
+
308
+ Notes
309
+ -----
310
+ The update is based on the description in [1]_, p.140.
311
+
312
+ References
313
+ ----------
314
+ .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
315
+ Second Edition (2006).
316
+ """
317
+
318
+ def __init__(self, exception_strategy='skip_update', min_curvature=None,
319
+ init_scale='auto'):
320
+ if exception_strategy == 'skip_update':
321
+ if min_curvature is not None:
322
+ self.min_curvature = min_curvature
323
+ else:
324
+ self.min_curvature = 1e-8
325
+ elif exception_strategy == 'damp_update':
326
+ if min_curvature is not None:
327
+ self.min_curvature = min_curvature
328
+ else:
329
+ self.min_curvature = 0.2
330
+ else:
331
+ raise ValueError("`exception_strategy` must be 'skip_update' "
332
+ "or 'damp_update'.")
333
+
334
+ super().__init__(init_scale)
335
+ self.exception_strategy = exception_strategy
336
+
337
+ def _update_inverse_hessian(self, ys, Hy, yHy, s):
338
+ """Update the inverse Hessian matrix.
339
+
340
+ BFGS update using the formula:
341
+
342
+ ``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
343
+ - 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
344
+
345
+ where ``s = delta_x`` and ``y = delta_grad``. This formula is
346
+ equivalent to (6.17) in [1]_ written in a more efficient way
347
+ for implementation.
348
+
349
+ References
350
+ ----------
351
+ .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
352
+ Second Edition (2006).
353
+ """
354
+ self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
355
+ self.H = self._syr((ys + yHy) / ys ** 2, s, a=self.H)
356
+
357
+ def _update_hessian(self, ys, Bs, sBs, y):
358
+ """Update the Hessian matrix.
359
+
360
+ BFGS update using the formula:
361
+
362
+ ``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
363
+
364
+ where ``s`` is short for ``delta_x`` and ``y`` is short
365
+ for ``delta_grad``. Formula (6.19) in [1]_.
366
+
367
+ References
368
+ ----------
369
+ .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
370
+ Second Edition (2006).
371
+ """
372
+ self.B = self._syr(1.0 / ys, y, a=self.B)
373
+ self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
374
+
375
+ def _update_implementation(self, delta_x, delta_grad):
376
+ # Auxiliary variables w and z
377
+ if self.approx_type == 'hess':
378
+ w = delta_x
379
+ z = delta_grad
380
+ else:
381
+ w = delta_grad
382
+ z = delta_x
383
+ # Do some common operations
384
+ wz = np.dot(w, z)
385
+ Mw = self.dot(w)
386
+ wMw = Mw.dot(w)
387
+ # Guarantee that wMw > 0 by reinitializing matrix.
388
+ # While this is always true in exact arithmetic,
389
+ # indefinite matrix may appear due to roundoff errors.
390
+ if wMw <= 0.0:
391
+ scale = self._auto_scale(delta_x, delta_grad)
392
+ # Reinitialize matrix
393
+ if self.approx_type == 'hess':
394
+ self.B = scale * np.eye(self.n, dtype=float)
395
+ else:
396
+ self.H = scale * np.eye(self.n, dtype=float)
397
+ # Do common operations for new matrix
398
+ Mw = self.dot(w)
399
+ wMw = Mw.dot(w)
400
+ # Check if curvature condition is violated
401
+ if wz <= self.min_curvature * wMw:
402
+ # If the option 'skip_update' is set
403
+ # we just skip the update when the condition
404
+ # is violated.
405
+ if self.exception_strategy == 'skip_update':
406
+ return
407
+ # If the option 'damp_update' is set we
408
+ # interpolate between the actual BFGS
409
+ # result and the unmodified matrix.
410
+ elif self.exception_strategy == 'damp_update':
411
+ update_factor = (1-self.min_curvature) / (1 - wz/wMw)
412
+ z = update_factor*z + (1-update_factor)*Mw
413
+ wz = np.dot(w, z)
414
+ # Update matrix
415
+ if self.approx_type == 'hess':
416
+ self._update_hessian(wz, Mw, wMw, z)
417
+ else:
418
+ self._update_inverse_hessian(wz, Mw, wMw, z)
419
+
420
+
421
+ class SR1(FullHessianUpdateStrategy):
422
+ """Symmetric-rank-1 Hessian update strategy.
423
+
424
+ Parameters
425
+ ----------
426
+ min_denominator : float
427
+ This number, scaled by a normalization factor,
428
+ defines the minimum denominator magnitude allowed
429
+ in the update. When the condition is violated we skip
430
+ the update. By default uses ``1e-8``.
431
+ init_scale : {float, np.array, 'auto'}, optional
432
+ This parameter can be used to initialize the Hessian or its
433
+ inverse. When a float is given, the relevant array is initialized
434
+ to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
435
+ Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
436
+ this array will be used. Otherwise an error is generated.
437
+ Set it to 'auto' in order to use an automatic heuristic for choosing
438
+ the initial scale. The heuristic is described in [1]_, p.143.
439
+ The default is 'auto'.
440
+
441
+ Notes
442
+ -----
443
+ The update is based on the description in [1]_, p.144-146.
444
+
445
+ References
446
+ ----------
447
+ .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
448
+ Second Edition (2006).
449
+ """
450
+
451
+ def __init__(self, min_denominator=1e-8, init_scale='auto'):
452
+ self.min_denominator = min_denominator
453
+ super().__init__(init_scale)
454
+
455
+ def _update_implementation(self, delta_x, delta_grad):
456
+ # Auxiliary variables w and z
457
+ if self.approx_type == 'hess':
458
+ w = delta_x
459
+ z = delta_grad
460
+ else:
461
+ w = delta_grad
462
+ z = delta_x
463
+ # Do some common operations
464
+ Mw = self.dot(w)
465
+ z_minus_Mw = z - Mw
466
+ denominator = np.dot(w, z_minus_Mw)
467
+ # If the denominator is too small
468
+ # we just skip the update.
469
+ if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
470
+ return
471
+ # Update matrix
472
+ if self.approx_type == 'hess':
473
+ self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
474
+ else:
475
+ self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_isotonic.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+
4
+ import numpy as np
5
+
6
+ from ._optimize import OptimizeResult
7
+ from ._pava_pybind import pava
8
+
9
+ if TYPE_CHECKING:
10
+ import numpy.typing as npt
11
+
12
+
13
+ __all__ = ["isotonic_regression"]
14
+
15
+
16
+ def isotonic_regression(
17
+ y: npt.ArrayLike,
18
+ *,
19
+ weights: npt.ArrayLike | None = None,
20
+ increasing: bool = True,
21
+ ) -> OptimizeResult:
22
+ r"""Nonparametric isotonic regression.
23
+
24
+ A (not strictly) monotonically increasing array `x` with the same length
25
+ as `y` is calculated by the pool adjacent violators algorithm (PAVA), see
26
+ [1]_. See the Notes section for more details.
27
+
28
+ Parameters
29
+ ----------
30
+ y : (N,) array_like
31
+ Response variable.
32
+ weights : (N,) array_like or None
33
+ Case weights.
34
+ increasing : bool
35
+ If True, fit monotonic increasing, i.e. isotonic, regression.
36
+ If False, fit a monotonic decreasing, i.e. antitonic, regression.
37
+ Default is True.
38
+
39
+ Returns
40
+ -------
41
+ res : OptimizeResult
42
+ The optimization result represented as a ``OptimizeResult`` object.
43
+ Important attributes are:
44
+
45
+ - ``x``: The isotonic regression solution, i.e. an increasing (or
46
+ decreasing) array of the same length than y, with elements in the
47
+ range from min(y) to max(y).
48
+ - ``weights`` : Array with the sum of case weights for each block
49
+ (or pool) B.
50
+ - ``blocks``: Array of length B+1 with the indices of the start
51
+ positions of each block (or pool) B. The j-th block is given by
52
+ ``x[blocks[j]:blocks[j+1]]`` for which all values are the same.
53
+
54
+ Notes
55
+ -----
56
+ Given data :math:`y` and case weights :math:`w`, the isotonic regression
57
+ solves the following optimization problem:
58
+
59
+ .. math::
60
+
61
+ \operatorname{argmin}_{x_i} \sum_i w_i (y_i - x_i)^2 \quad
62
+ \text{subject to } x_i \leq x_j \text{ whenever } i \leq j \,.
63
+
64
+ For every input value :math:`y_i`, it generates a value :math:`x_i` such
65
+ that :math:`x` is increasing (but not strictly), i.e.
66
+ :math:`x_i \leq x_{i+1}`. This is accomplished by the PAVA.
67
+ The solution consists of pools or blocks, i.e. neighboring elements of
68
+ :math:`x`, e.g. :math:`x_i` and :math:`x_{i+1}`, that all have the same
69
+ value.
70
+
71
+ Most interestingly, the solution stays the same if the squared loss is
72
+ replaced by the wide class of Bregman functions which are the unique
73
+ class of strictly consistent scoring functions for the mean, see [2]_
74
+ and references therein.
75
+
76
+ The implemented version of PAVA according to [1]_ has a computational
77
+ complexity of O(N) with input size N.
78
+
79
+ References
80
+ ----------
81
+ .. [1] Busing, F. M. T. A. (2022).
82
+ Monotone Regression: A Simple and Fast O(n) PAVA Implementation.
83
+ Journal of Statistical Software, Code Snippets, 102(1), 1-25.
84
+ :doi:`10.18637/jss.v102.c01`
85
+ .. [2] Jordan, A.I., Mühlemann, A. & Ziegel, J.F.
86
+ Characterizing the optimal solutions to the isotonic regression
87
+ problem for identifiable functionals.
88
+ Ann Inst Stat Math 74, 489-514 (2022).
89
+ :doi:`10.1007/s10463-021-00808-0`
90
+
91
+ Examples
92
+ --------
93
+ This example demonstrates that ``isotonic_regression`` really solves a
94
+ constrained optimization problem.
95
+
96
+ >>> import numpy as np
97
+ >>> from scipy.optimize import isotonic_regression, minimize
98
+ >>> y = [1.5, 1.0, 4.0, 6.0, 5.7, 5.0, 7.8, 9.0, 7.5, 9.5, 9.0]
99
+ >>> def objective(yhat, y):
100
+ ... return np.sum((yhat - y)**2)
101
+ >>> def constraint(yhat, y):
102
+ ... # This is for a monotonically increasing regression.
103
+ ... return np.diff(yhat)
104
+ >>> result = minimize(objective, x0=y, args=(y,),
105
+ ... constraints=[{'type': 'ineq',
106
+ ... 'fun': lambda x: constraint(x, y)}])
107
+ >>> result.x
108
+ array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
109
+ 5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
110
+ 9.25 ])
111
+ >>> result = isotonic_regression(y)
112
+ >>> result.x
113
+ array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
114
+ 5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
115
+ 9.25 ])
116
+
117
+ The big advantage of ``isotonic_regression`` compared to calling
118
+ ``minimize`` is that it is more user friendly, i.e. one does not need to
119
+ define objective and constraint functions, and that it is orders of
120
+ magnitudes faster. On commodity hardware (in 2023), for normal distributed
121
+ input y of length 1000, the minimizer takes about 4 seconds, while
122
+ ``isotonic_regression`` takes about 200 microseconds.
123
+ """
124
+ yarr = np.atleast_1d(y) # Check yarr.ndim == 1 is implicit (pybind11) in pava.
125
+ order = slice(None) if increasing else slice(None, None, -1)
126
+ x = np.array(yarr[order], order="C", dtype=np.float64, copy=True)
127
+ if weights is None:
128
+ wx = np.ones_like(yarr, dtype=np.float64)
129
+ else:
130
+ warr = np.atleast_1d(weights)
131
+
132
+ if not (yarr.ndim == warr.ndim == 1 and yarr.shape[0] == warr.shape[0]):
133
+ raise ValueError(
134
+ "Input arrays y and w must have one dimension of equal length."
135
+ )
136
+ if np.any(warr <= 0):
137
+ raise ValueError("Weights w must be strictly positive.")
138
+
139
+ wx = np.array(warr[order], order="C", dtype=np.float64, copy=True)
140
+ n = x.shape[0]
141
+ r = np.full(shape=n + 1, fill_value=-1, dtype=np.intp)
142
+ x, wx, r, b = pava(x, wx, r)
143
+ # Now that we know the number of blocks b, we only keep the relevant part
144
+ # of r and wx.
145
+ # As information: Due to the pava implementation, after the last block
146
+ # index, there might be smaller numbers appended to r, e.g.
147
+ # r = [0, 10, 8, 7] which in the end should be r = [0, 10].
148
+ r = r[:b + 1]
149
+ wx = wx[:b]
150
+ if not increasing:
151
+ x = x[::-1]
152
+ wx = wx[::-1]
153
+ r = r[-1] - r[::-1]
154
+ return OptimizeResult(
155
+ x=x,
156
+ weights=wx,
157
+ blocks=r,
158
+ )
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py ADDED
@@ -0,0 +1,543 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Functions
3
+ ---------
4
+ .. autosummary::
5
+ :toctree: generated/
6
+
7
+ fmin_l_bfgs_b
8
+
9
+ """
10
+
11
+ ## License for the Python wrapper
12
+ ## ==============================
13
+
14
+ ## Copyright (c) 2004 David M. Cooke <[email protected]>
15
+
16
+ ## Permission is hereby granted, free of charge, to any person obtaining a
17
+ ## copy of this software and associated documentation files (the "Software"),
18
+ ## to deal in the Software without restriction, including without limitation
19
+ ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
+ ## and/or sell copies of the Software, and to permit persons to whom the
21
+ ## Software is furnished to do so, subject to the following conditions:
22
+
23
+ ## The above copyright notice and this permission notice shall be included in
24
+ ## all copies or substantial portions of the Software.
25
+
26
+ ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27
+ ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28
+ ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29
+ ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30
+ ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31
+ ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32
+ ## DEALINGS IN THE SOFTWARE.
33
+
34
+ ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
35
+
36
+ import numpy as np
37
+ from numpy import array, asarray, float64, zeros
38
+ from . import _lbfgsb
39
+ from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
40
+ _wrap_callback, _check_unknown_options,
41
+ _prepare_scalar_function)
42
+ from ._constraints import old_bound_to_new
43
+
44
+ from scipy.sparse.linalg import LinearOperator
45
+
46
+ __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
47
+
48
+
49
+ def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
50
+ approx_grad=0,
51
+ bounds=None, m=10, factr=1e7, pgtol=1e-5,
52
+ epsilon=1e-8,
53
+ iprint=-1, maxfun=15000, maxiter=15000, disp=None,
54
+ callback=None, maxls=20):
55
+ """
56
+ Minimize a function func using the L-BFGS-B algorithm.
57
+
58
+ Parameters
59
+ ----------
60
+ func : callable f(x,*args)
61
+ Function to minimize.
62
+ x0 : ndarray
63
+ Initial guess.
64
+ fprime : callable fprime(x,*args), optional
65
+ The gradient of `func`. If None, then `func` returns the function
66
+ value and the gradient (``f, g = func(x, *args)``), unless
67
+ `approx_grad` is True in which case `func` returns only ``f``.
68
+ args : sequence, optional
69
+ Arguments to pass to `func` and `fprime`.
70
+ approx_grad : bool, optional
71
+ Whether to approximate the gradient numerically (in which case
72
+ `func` returns only the function value).
73
+ bounds : list, optional
74
+ ``(min, max)`` pairs for each element in ``x``, defining
75
+ the bounds on that parameter. Use None or +-inf for one of ``min`` or
76
+ ``max`` when there is no bound in that direction.
77
+ m : int, optional
78
+ The maximum number of variable metric corrections
79
+ used to define the limited memory matrix. (The limited memory BFGS
80
+ method does not store the full hessian but uses this many terms in an
81
+ approximation to it.)
82
+ factr : float, optional
83
+ The iteration stops when
84
+ ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
85
+ where ``eps`` is the machine precision, which is automatically
86
+ generated by the code. Typical values for `factr` are: 1e12 for
87
+ low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
88
+ high accuracy. See Notes for relationship to `ftol`, which is exposed
89
+ (instead of `factr`) by the `scipy.optimize.minimize` interface to
90
+ L-BFGS-B.
91
+ pgtol : float, optional
92
+ The iteration will stop when
93
+ ``max{|proj g_i | i = 1, ..., n} <= pgtol``
94
+ where ``proj g_i`` is the i-th component of the projected gradient.
95
+ epsilon : float, optional
96
+ Step size used when `approx_grad` is True, for numerically
97
+ calculating the gradient
98
+ iprint : int, optional
99
+ Controls the frequency of output. ``iprint < 0`` means no output;
100
+ ``iprint = 0`` print only one line at the last iteration;
101
+ ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
102
+ ``iprint = 99`` print details of every iteration except n-vectors;
103
+ ``iprint = 100`` print also the changes of active set and final x;
104
+ ``iprint > 100`` print details of every iteration including x and g.
105
+ disp : int, optional
106
+ If zero, then no output. If a positive number, then this over-rides
107
+ `iprint` (i.e., `iprint` gets the value of `disp`).
108
+ maxfun : int, optional
109
+ Maximum number of function evaluations. Note that this function
110
+ may violate the limit because of evaluating gradients by numerical
111
+ differentiation.
112
+ maxiter : int, optional
113
+ Maximum number of iterations.
114
+ callback : callable, optional
115
+ Called after each iteration, as ``callback(xk)``, where ``xk`` is the
116
+ current parameter vector.
117
+ maxls : int, optional
118
+ Maximum number of line search steps (per iteration). Default is 20.
119
+
120
+ Returns
121
+ -------
122
+ x : array_like
123
+ Estimated position of the minimum.
124
+ f : float
125
+ Value of `func` at the minimum.
126
+ d : dict
127
+ Information dictionary.
128
+
129
+ * d['warnflag'] is
130
+
131
+ - 0 if converged,
132
+ - 1 if too many function evaluations or too many iterations,
133
+ - 2 if stopped for another reason, given in d['task']
134
+
135
+ * d['grad'] is the gradient at the minimum (should be 0 ish)
136
+ * d['funcalls'] is the number of function calls made.
137
+ * d['nit'] is the number of iterations.
138
+
139
+ See also
140
+ --------
141
+ minimize: Interface to minimization algorithms for multivariate
142
+ functions. See the 'L-BFGS-B' `method` in particular. Note that the
143
+ `ftol` option is made available via that interface, while `factr` is
144
+ provided via this interface, where `factr` is the factor multiplying
145
+ the default machine floating-point precision to arrive at `ftol`:
146
+ ``ftol = factr * numpy.finfo(float).eps``.
147
+
148
+ Notes
149
+ -----
150
+ License of L-BFGS-B (FORTRAN code):
151
+
152
+ The version included here (in fortran code) is 3.0
153
+ (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
154
+ and Jorge Nocedal <[email protected]>. It carries the following
155
+ condition for use:
156
+
157
+ This software is freely available, but we expect that all publications
158
+ describing work using this software, or all commercial products using it,
159
+ quote at least one of the references given below. This software is released
160
+ under the BSD License.
161
+
162
+ References
163
+ ----------
164
+ * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
165
+ Constrained Optimization, (1995), SIAM Journal on Scientific and
166
+ Statistical Computing, 16, 5, pp. 1190-1208.
167
+ * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
168
+ FORTRAN routines for large scale bound constrained optimization (1997),
169
+ ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
170
+ * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
171
+ FORTRAN routines for large scale bound constrained optimization (2011),
172
+ ACM Transactions on Mathematical Software, 38, 1.
173
+
174
+ Examples
175
+ --------
176
+ Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we define
177
+ an objective function ``f(m, b) = (y - y_model)**2``, where `y` describes the
178
+ observations and `y_model` the prediction of the linear model as
179
+ ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, are arbitrarily
180
+ chosen as ``(0,5)`` and ``(5,10)`` for this example.
181
+
182
+ >>> import numpy as np
183
+ >>> from scipy.optimize import fmin_l_bfgs_b
184
+ >>> X = np.arange(0, 10, 1)
185
+ >>> M = 2
186
+ >>> B = 3
187
+ >>> Y = M * X + B
188
+ >>> def func(parameters, *args):
189
+ ... x = args[0]
190
+ ... y = args[1]
191
+ ... m, b = parameters
192
+ ... y_model = m*x + b
193
+ ... error = sum(np.power((y - y_model), 2))
194
+ ... return error
195
+
196
+ >>> initial_values = np.array([0.0, 1.0])
197
+
198
+ >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
199
+ ... approx_grad=True)
200
+ >>> x_opt, f_opt
201
+ array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary
202
+
203
+ The optimized parameters in ``x_opt`` agree with the ground truth parameters
204
+ ``m`` and ``b``. Next, let us perform a bound contrained optimization using the `bounds`
205
+ parameter.
206
+
207
+ >>> bounds = [(0, 5), (5, 10)]
208
+ >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
209
+ ... approx_grad=True, bounds=bounds)
210
+ >>> x_opt, f_opt
211
+ array([1.65990508, 5.31649385]), 15.721334516453945 # may vary
212
+ """
213
+ # handle fprime/approx_grad
214
+ if approx_grad:
215
+ fun = func
216
+ jac = None
217
+ elif fprime is None:
218
+ fun = MemoizeJac(func)
219
+ jac = fun.derivative
220
+ else:
221
+ fun = func
222
+ jac = fprime
223
+
224
+ # build options
225
+ callback = _wrap_callback(callback)
226
+ opts = {'disp': disp,
227
+ 'iprint': iprint,
228
+ 'maxcor': m,
229
+ 'ftol': factr * np.finfo(float).eps,
230
+ 'gtol': pgtol,
231
+ 'eps': epsilon,
232
+ 'maxfun': maxfun,
233
+ 'maxiter': maxiter,
234
+ 'callback': callback,
235
+ 'maxls': maxls}
236
+
237
+ res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
238
+ **opts)
239
+ d = {'grad': res['jac'],
240
+ 'task': res['message'],
241
+ 'funcalls': res['nfev'],
242
+ 'nit': res['nit'],
243
+ 'warnflag': res['status']}
244
+ f = res['fun']
245
+ x = res['x']
246
+
247
+ return x, f, d
248
+
249
+
250
+ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
251
+ disp=None, maxcor=10, ftol=2.2204460492503131e-09,
252
+ gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
253
+ iprint=-1, callback=None, maxls=20,
254
+ finite_diff_rel_step=None, **unknown_options):
255
+ """
256
+ Minimize a scalar function of one or more variables using the L-BFGS-B
257
+ algorithm.
258
+
259
+ Options
260
+ -------
261
+ disp : None or int
262
+ If `disp is None` (the default), then the supplied version of `iprint`
263
+ is used. If `disp is not None`, then it overrides the supplied version
264
+ of `iprint` with the behaviour you outlined.
265
+ maxcor : int
266
+ The maximum number of variable metric corrections used to
267
+ define the limited memory matrix. (The limited memory BFGS
268
+ method does not store the full hessian but uses this many terms
269
+ in an approximation to it.)
270
+ ftol : float
271
+ The iteration stops when ``(f^k -
272
+ f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
273
+ gtol : float
274
+ The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
275
+ <= gtol`` where ``proj g_i`` is the i-th component of the
276
+ projected gradient.
277
+ eps : float or ndarray
278
+ If `jac is None` the absolute step size used for numerical
279
+ approximation of the jacobian via forward differences.
280
+ maxfun : int
281
+ Maximum number of function evaluations. Note that this function
282
+ may violate the limit because of evaluating gradients by numerical
283
+ differentiation.
284
+ maxiter : int
285
+ Maximum number of iterations.
286
+ iprint : int, optional
287
+ Controls the frequency of output. ``iprint < 0`` means no output;
288
+ ``iprint = 0`` print only one line at the last iteration;
289
+ ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
290
+ ``iprint = 99`` print details of every iteration except n-vectors;
291
+ ``iprint = 100`` print also the changes of active set and final x;
292
+ ``iprint > 100`` print details of every iteration including x and g.
293
+ maxls : int, optional
294
+ Maximum number of line search steps (per iteration). Default is 20.
295
+ finite_diff_rel_step : None or array_like, optional
296
+ If `jac in ['2-point', '3-point', 'cs']` the relative step size to
297
+ use for numerical approximation of the jacobian. The absolute step
298
+ size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
299
+ possibly adjusted to fit into the bounds. For ``method='3-point'``
300
+ the sign of `h` is ignored. If None (default) then step is selected
301
+ automatically.
302
+
303
+ Notes
304
+ -----
305
+ The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
306
+ but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
307
+ relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
308
+ I.e., `factr` multiplies the default machine floating-point precision to
309
+ arrive at `ftol`.
310
+
311
+ """
312
+ _check_unknown_options(unknown_options)
313
+ m = maxcor
314
+ pgtol = gtol
315
+ factr = ftol / np.finfo(float).eps
316
+
317
+ x0 = asarray(x0).ravel()
318
+ n, = x0.shape
319
+
320
+ # historically old-style bounds were/are expected by lbfgsb.
321
+ # That's still the case but we'll deal with new-style from here on,
322
+ # it's easier
323
+ if bounds is None:
324
+ pass
325
+ elif len(bounds) != n:
326
+ raise ValueError('length of x0 != length of bounds')
327
+ else:
328
+ bounds = np.array(old_bound_to_new(bounds))
329
+
330
+ # check bounds
331
+ if (bounds[0] > bounds[1]).any():
332
+ raise ValueError(
333
+ "LBFGSB - one of the lower bounds is greater than an upper bound."
334
+ )
335
+
336
+ # initial vector must lie within the bounds. Otherwise ScalarFunction and
337
+ # approx_derivative will cause problems
338
+ x0 = np.clip(x0, bounds[0], bounds[1])
339
+
340
+ if disp is not None:
341
+ if disp == 0:
342
+ iprint = -1
343
+ else:
344
+ iprint = disp
345
+
346
+ # _prepare_scalar_function can use bounds=None to represent no bounds
347
+ sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
348
+ bounds=bounds,
349
+ finite_diff_rel_step=finite_diff_rel_step)
350
+
351
+ func_and_grad = sf.fun_and_grad
352
+
353
+ fortran_int = _lbfgsb.types.intvar.dtype
354
+
355
+ nbd = zeros(n, fortran_int)
356
+ low_bnd = zeros(n, float64)
357
+ upper_bnd = zeros(n, float64)
358
+ bounds_map = {(-np.inf, np.inf): 0,
359
+ (1, np.inf): 1,
360
+ (1, 1): 2,
361
+ (-np.inf, 1): 3}
362
+
363
+ if bounds is not None:
364
+ for i in range(0, n):
365
+ l, u = bounds[0, i], bounds[1, i]
366
+ if not np.isinf(l):
367
+ low_bnd[i] = l
368
+ l = 1
369
+ if not np.isinf(u):
370
+ upper_bnd[i] = u
371
+ u = 1
372
+ nbd[i] = bounds_map[l, u]
373
+
374
+ if not maxls > 0:
375
+ raise ValueError('maxls must be positive.')
376
+
377
+ x = array(x0, float64)
378
+ f = array(0.0, float64)
379
+ g = zeros((n,), float64)
380
+ wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
381
+ iwa = zeros(3*n, fortran_int)
382
+ task = zeros(1, 'S60')
383
+ csave = zeros(1, 'S60')
384
+ lsave = zeros(4, fortran_int)
385
+ isave = zeros(44, fortran_int)
386
+ dsave = zeros(29, float64)
387
+
388
+ task[:] = 'START'
389
+
390
+ n_iterations = 0
391
+
392
+ while 1:
393
+ # g may become float32 if a user provides a function that calculates
394
+ # the Jacobian in float32 (see gh-18730). The underlying Fortran code
395
+ # expects float64, so upcast it
396
+ g = g.astype(np.float64)
397
+ # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
398
+ _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
399
+ pgtol, wa, iwa, task, iprint, csave, lsave,
400
+ isave, dsave, maxls)
401
+ task_str = task.tobytes()
402
+ if task_str.startswith(b'FG'):
403
+ # The minimization routine wants f and g at the current x.
404
+ # Note that interruptions due to maxfun are postponed
405
+ # until the completion of the current minimization iteration.
406
+ # Overwrite f and g:
407
+ f, g = func_and_grad(x)
408
+ elif task_str.startswith(b'NEW_X'):
409
+ # new iteration
410
+ n_iterations += 1
411
+
412
+ intermediate_result = OptimizeResult(x=x, fun=f)
413
+ if _call_callback_maybe_halt(callback, intermediate_result):
414
+ task[:] = 'STOP: CALLBACK REQUESTED HALT'
415
+ if n_iterations >= maxiter:
416
+ task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
417
+ elif sf.nfev > maxfun:
418
+ task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
419
+ 'EXCEEDS LIMIT')
420
+ else:
421
+ break
422
+
423
+ task_str = task.tobytes().strip(b'\x00').strip()
424
+ if task_str.startswith(b'CONV'):
425
+ warnflag = 0
426
+ elif sf.nfev > maxfun or n_iterations >= maxiter:
427
+ warnflag = 1
428
+ else:
429
+ warnflag = 2
430
+
431
+ # These two portions of the workspace are described in the mainlb
432
+ # subroutine in lbfgsb.f. See line 363.
433
+ s = wa[0: m*n].reshape(m, n)
434
+ y = wa[m*n: 2*m*n].reshape(m, n)
435
+
436
+ # See lbfgsb.f line 160 for this portion of the workspace.
437
+ # isave(31) = the total number of BFGS updates prior the current iteration;
438
+ n_bfgs_updates = isave[30]
439
+
440
+ n_corrs = min(n_bfgs_updates, maxcor)
441
+ hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
442
+
443
+ task_str = task_str.decode()
444
+ return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
445
+ njev=sf.ngev,
446
+ nit=n_iterations, status=warnflag, message=task_str,
447
+ x=x, success=(warnflag == 0), hess_inv=hess_inv)
448
+
449
+
450
+ class LbfgsInvHessProduct(LinearOperator):
451
+ """Linear operator for the L-BFGS approximate inverse Hessian.
452
+
453
+ This operator computes the product of a vector with the approximate inverse
454
+ of the Hessian of the objective function, using the L-BFGS limited
455
+ memory approximation to the inverse Hessian, accumulated during the
456
+ optimization.
457
+
458
+ Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
459
+ interface.
460
+
461
+ Parameters
462
+ ----------
463
+ sk : array_like, shape=(n_corr, n)
464
+ Array of `n_corr` most recent updates to the solution vector.
465
+ (See [1]).
466
+ yk : array_like, shape=(n_corr, n)
467
+ Array of `n_corr` most recent updates to the gradient. (See [1]).
468
+
469
+ References
470
+ ----------
471
+ .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
472
+ storage." Mathematics of computation 35.151 (1980): 773-782.
473
+
474
+ """
475
+
476
+ def __init__(self, sk, yk):
477
+ """Construct the operator."""
478
+ if sk.shape != yk.shape or sk.ndim != 2:
479
+ raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
480
+ n_corrs, n = sk.shape
481
+
482
+ super().__init__(dtype=np.float64, shape=(n, n))
483
+
484
+ self.sk = sk
485
+ self.yk = yk
486
+ self.n_corrs = n_corrs
487
+ self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
488
+
489
+ def _matvec(self, x):
490
+ """Efficient matrix-vector multiply with the BFGS matrices.
491
+
492
+ This calculation is described in Section (4) of [1].
493
+
494
+ Parameters
495
+ ----------
496
+ x : ndarray
497
+ An array with shape (n,) or (n,1).
498
+
499
+ Returns
500
+ -------
501
+ y : ndarray
502
+ The matrix-vector product
503
+
504
+ """
505
+ s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
506
+ q = np.array(x, dtype=self.dtype, copy=True)
507
+ if q.ndim == 2 and q.shape[1] == 1:
508
+ q = q.reshape(-1)
509
+
510
+ alpha = np.empty(n_corrs)
511
+
512
+ for i in range(n_corrs-1, -1, -1):
513
+ alpha[i] = rho[i] * np.dot(s[i], q)
514
+ q = q - alpha[i]*y[i]
515
+
516
+ r = q
517
+ for i in range(n_corrs):
518
+ beta = rho[i] * np.dot(y[i], r)
519
+ r = r + s[i] * (alpha[i] - beta)
520
+
521
+ return r
522
+
523
+ def todense(self):
524
+ """Return a dense array representation of this operator.
525
+
526
+ Returns
527
+ -------
528
+ arr : ndarray, shape=(n, n)
529
+ An array with the same shape and containing
530
+ the same data represented by this `LinearOperator`.
531
+
532
+ """
533
+ s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
534
+ I = np.eye(*self.shape, dtype=self.dtype)
535
+ Hk = I
536
+
537
+ for i in range(n_corrs):
538
+ A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
539
+ A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
540
+
541
+ Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
542
+ s[i][np.newaxis, :])
543
+ return Hk
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linesearch.py ADDED
@@ -0,0 +1,896 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Functions
3
+ ---------
4
+ .. autosummary::
5
+ :toctree: generated/
6
+
7
+ line_search_armijo
8
+ line_search_wolfe1
9
+ line_search_wolfe2
10
+ scalar_search_wolfe1
11
+ scalar_search_wolfe2
12
+
13
+ """
14
+ from warnings import warn
15
+
16
+ from ._dcsrch import DCSRCH
17
+ import numpy as np
18
+
19
+ __all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
20
+ 'scalar_search_wolfe1', 'scalar_search_wolfe2',
21
+ 'line_search_armijo']
22
+
23
+ class LineSearchWarning(RuntimeWarning):
24
+ pass
25
+
26
+
27
+ def _check_c1_c2(c1, c2):
28
+ if not (0 < c1 < c2 < 1):
29
+ raise ValueError("'c1' and 'c2' do not satisfy"
30
+ "'0 < c1 < c2 < 1'.")
31
+
32
+
33
+ #------------------------------------------------------------------------------
34
+ # Minpack's Wolfe line and scalar searches
35
+ #------------------------------------------------------------------------------
36
+
37
+ def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
38
+ old_fval=None, old_old_fval=None,
39
+ args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
40
+ xtol=1e-14):
41
+ """
42
+ As `scalar_search_wolfe1` but do a line search to direction `pk`
43
+
44
+ Parameters
45
+ ----------
46
+ f : callable
47
+ Function `f(x)`
48
+ fprime : callable
49
+ Gradient of `f`
50
+ xk : array_like
51
+ Current point
52
+ pk : array_like
53
+ Search direction
54
+ gfk : array_like, optional
55
+ Gradient of `f` at point `xk`
56
+ old_fval : float, optional
57
+ Value of `f` at point `xk`
58
+ old_old_fval : float, optional
59
+ Value of `f` at point preceding `xk`
60
+
61
+ The rest of the parameters are the same as for `scalar_search_wolfe1`.
62
+
63
+ Returns
64
+ -------
65
+ stp, f_count, g_count, fval, old_fval
66
+ As in `line_search_wolfe1`
67
+ gval : array
68
+ Gradient of `f` at the final point
69
+
70
+ Notes
71
+ -----
72
+ Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
73
+
74
+ """
75
+ if gfk is None:
76
+ gfk = fprime(xk, *args)
77
+
78
+ gval = [gfk]
79
+ gc = [0]
80
+ fc = [0]
81
+
82
+ def phi(s):
83
+ fc[0] += 1
84
+ return f(xk + s*pk, *args)
85
+
86
+ def derphi(s):
87
+ gval[0] = fprime(xk + s*pk, *args)
88
+ gc[0] += 1
89
+ return np.dot(gval[0], pk)
90
+
91
+ derphi0 = np.dot(gfk, pk)
92
+
93
+ stp, fval, old_fval = scalar_search_wolfe1(
94
+ phi, derphi, old_fval, old_old_fval, derphi0,
95
+ c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
96
+
97
+ return stp, fc[0], gc[0], fval, old_fval, gval[0]
98
+
99
+
100
+ def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
101
+ c1=1e-4, c2=0.9,
102
+ amax=50, amin=1e-8, xtol=1e-14):
103
+ """
104
+ Scalar function search for alpha that satisfies strong Wolfe conditions
105
+
106
+ alpha > 0 is assumed to be a descent direction.
107
+
108
+ Parameters
109
+ ----------
110
+ phi : callable phi(alpha)
111
+ Function at point `alpha`
112
+ derphi : callable phi'(alpha)
113
+ Objective function derivative. Returns a scalar.
114
+ phi0 : float, optional
115
+ Value of phi at 0
116
+ old_phi0 : float, optional
117
+ Value of phi at previous point
118
+ derphi0 : float, optional
119
+ Value derphi at 0
120
+ c1 : float, optional
121
+ Parameter for Armijo condition rule.
122
+ c2 : float, optional
123
+ Parameter for curvature condition rule.
124
+ amax, amin : float, optional
125
+ Maximum and minimum step size
126
+ xtol : float, optional
127
+ Relative tolerance for an acceptable step.
128
+
129
+ Returns
130
+ -------
131
+ alpha : float
132
+ Step size, or None if no suitable step was found
133
+ phi : float
134
+ Value of `phi` at the new point `alpha`
135
+ phi0 : float
136
+ Value of `phi` at `alpha=0`
137
+
138
+ Notes
139
+ -----
140
+ Uses routine DCSRCH from MINPACK.
141
+
142
+ Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.
143
+
144
+ References
145
+ ----------
146
+
147
+ .. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
148
+ In Springer Series in Operations Research and Financial Engineering.
149
+ (Springer Series in Operations Research and Financial Engineering).
150
+ Springer Nature.
151
+
152
+ """
153
+ _check_c1_c2(c1, c2)
154
+
155
+ if phi0 is None:
156
+ phi0 = phi(0.)
157
+ if derphi0 is None:
158
+ derphi0 = derphi(0.)
159
+
160
+ if old_phi0 is not None and derphi0 != 0:
161
+ alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
162
+ if alpha1 < 0:
163
+ alpha1 = 1.0
164
+ else:
165
+ alpha1 = 1.0
166
+
167
+ maxiter = 100
168
+
169
+ dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
170
+ stp, phi1, phi0, task = dcsrch(
171
+ alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
172
+ )
173
+
174
+ return stp, phi1, phi0
175
+
176
+
177
+ line_search = line_search_wolfe1
178
+
179
+
180
+ #------------------------------------------------------------------------------
181
+ # Pure-Python Wolfe line and scalar searches
182
+ #------------------------------------------------------------------------------
183
+
184
+ # Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`
185
+
186
+ def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
187
+ old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
188
+ extra_condition=None, maxiter=10):
189
+ """Find alpha that satisfies strong Wolfe conditions.
190
+
191
+ Parameters
192
+ ----------
193
+ f : callable f(x,*args)
194
+ Objective function.
195
+ myfprime : callable f'(x,*args)
196
+ Objective function gradient.
197
+ xk : ndarray
198
+ Starting point.
199
+ pk : ndarray
200
+ Search direction. The search direction must be a descent direction
201
+ for the algorithm to converge.
202
+ gfk : ndarray, optional
203
+ Gradient value for x=xk (xk being the current parameter
204
+ estimate). Will be recomputed if omitted.
205
+ old_fval : float, optional
206
+ Function value for x=xk. Will be recomputed if omitted.
207
+ old_old_fval : float, optional
208
+ Function value for the point preceding x=xk.
209
+ args : tuple, optional
210
+ Additional arguments passed to objective function.
211
+ c1 : float, optional
212
+ Parameter for Armijo condition rule.
213
+ c2 : float, optional
214
+ Parameter for curvature condition rule.
215
+ amax : float, optional
216
+ Maximum step size
217
+ extra_condition : callable, optional
218
+ A callable of the form ``extra_condition(alpha, x, f, g)``
219
+ returning a boolean. Arguments are the proposed step ``alpha``
220
+ and the corresponding ``x``, ``f`` and ``g`` values. The line search
221
+ accepts the value of ``alpha`` only if this
222
+ callable returns ``True``. If the callable returns ``False``
223
+ for the step length, the algorithm will continue with
224
+ new iterates. The callable is only called for iterates
225
+ satisfying the strong Wolfe conditions.
226
+ maxiter : int, optional
227
+ Maximum number of iterations to perform.
228
+
229
+ Returns
230
+ -------
231
+ alpha : float or None
232
+ Alpha for which ``x_new = x0 + alpha * pk``,
233
+ or None if the line search algorithm did not converge.
234
+ fc : int
235
+ Number of function evaluations made.
236
+ gc : int
237
+ Number of gradient evaluations made.
238
+ new_fval : float or None
239
+ New function value ``f(x_new)=f(x0+alpha*pk)``,
240
+ or None if the line search algorithm did not converge.
241
+ old_fval : float
242
+ Old function value ``f(x0)``.
243
+ new_slope : float or None
244
+ The local slope along the search direction at the
245
+ new value ``<myfprime(x_new), pk>``,
246
+ or None if the line search algorithm did not converge.
247
+
248
+
249
+ Notes
250
+ -----
251
+ Uses the line search algorithm to enforce strong Wolfe
252
+ conditions. See Wright and Nocedal, 'Numerical Optimization',
253
+ 1999, pp. 59-61.
254
+
255
+ The search direction `pk` must be a descent direction (e.g.
256
+ ``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
257
+ conditions. If the search direction is not a descent direction (e.g.
258
+ ``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.
259
+
260
+ Examples
261
+ --------
262
+ >>> import numpy as np
263
+ >>> from scipy.optimize import line_search
264
+
265
+ A objective function and its gradient are defined.
266
+
267
+ >>> def obj_func(x):
268
+ ... return (x[0])**2+(x[1])**2
269
+ >>> def obj_grad(x):
270
+ ... return [2*x[0], 2*x[1]]
271
+
272
+ We can find alpha that satisfies strong Wolfe conditions.
273
+
274
+ >>> start_point = np.array([1.8, 1.7])
275
+ >>> search_gradient = np.array([-1.0, -1.0])
276
+ >>> line_search(obj_func, obj_grad, start_point, search_gradient)
277
+ (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
278
+
279
+ """
280
+ fc = [0]
281
+ gc = [0]
282
+ gval = [None]
283
+ gval_alpha = [None]
284
+
285
+ def phi(alpha):
286
+ fc[0] += 1
287
+ return f(xk + alpha * pk, *args)
288
+
289
+ fprime = myfprime
290
+
291
+ def derphi(alpha):
292
+ gc[0] += 1
293
+ gval[0] = fprime(xk + alpha * pk, *args) # store for later use
294
+ gval_alpha[0] = alpha
295
+ return np.dot(gval[0], pk)
296
+
297
+ if gfk is None:
298
+ gfk = fprime(xk, *args)
299
+ derphi0 = np.dot(gfk, pk)
300
+
301
+ if extra_condition is not None:
302
+ # Add the current gradient as argument, to avoid needless
303
+ # re-evaluation
304
+ def extra_condition2(alpha, phi):
305
+ if gval_alpha[0] != alpha:
306
+ derphi(alpha)
307
+ x = xk + alpha * pk
308
+ return extra_condition(alpha, x, phi, gval[0])
309
+ else:
310
+ extra_condition2 = None
311
+
312
+ alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
313
+ phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
314
+ extra_condition2, maxiter=maxiter)
315
+
316
+ if derphi_star is None:
317
+ warn('The line search algorithm did not converge',
318
+ LineSearchWarning, stacklevel=2)
319
+ else:
320
+ # derphi_star is a number (derphi) -- so use the most recently
321
+ # calculated gradient used in computing it derphi = gfk*pk
322
+ # this is the gradient at the next step no need to compute it
323
+ # again in the outer loop.
324
+ derphi_star = gval[0]
325
+
326
+ return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
327
+
328
+
329
+ def scalar_search_wolfe2(phi, derphi, phi0=None,
330
+ old_phi0=None, derphi0=None,
331
+ c1=1e-4, c2=0.9, amax=None,
332
+ extra_condition=None, maxiter=10):
333
+ """Find alpha that satisfies strong Wolfe conditions.
334
+
335
+ alpha > 0 is assumed to be a descent direction.
336
+
337
+ Parameters
338
+ ----------
339
+ phi : callable phi(alpha)
340
+ Objective scalar function.
341
+ derphi : callable phi'(alpha)
342
+ Objective function derivative. Returns a scalar.
343
+ phi0 : float, optional
344
+ Value of phi at 0.
345
+ old_phi0 : float, optional
346
+ Value of phi at previous point.
347
+ derphi0 : float, optional
348
+ Value of derphi at 0
349
+ c1 : float, optional
350
+ Parameter for Armijo condition rule.
351
+ c2 : float, optional
352
+ Parameter for curvature condition rule.
353
+ amax : float, optional
354
+ Maximum step size.
355
+ extra_condition : callable, optional
356
+ A callable of the form ``extra_condition(alpha, phi_value)``
357
+ returning a boolean. The line search accepts the value
358
+ of ``alpha`` only if this callable returns ``True``.
359
+ If the callable returns ``False`` for the step length,
360
+ the algorithm will continue with new iterates.
361
+ The callable is only called for iterates satisfying
362
+ the strong Wolfe conditions.
363
+ maxiter : int, optional
364
+ Maximum number of iterations to perform.
365
+
366
+ Returns
367
+ -------
368
+ alpha_star : float or None
369
+ Best alpha, or None if the line search algorithm did not converge.
370
+ phi_star : float
371
+ phi at alpha_star.
372
+ phi0 : float
373
+ phi at 0.
374
+ derphi_star : float or None
375
+ derphi at alpha_star, or None if the line search algorithm
376
+ did not converge.
377
+
378
+ Notes
379
+ -----
380
+ Uses the line search algorithm to enforce strong Wolfe
381
+ conditions. See Wright and Nocedal, 'Numerical Optimization',
382
+ 1999, pp. 59-61.
383
+
384
+ """
385
+ _check_c1_c2(c1, c2)
386
+
387
+ if phi0 is None:
388
+ phi0 = phi(0.)
389
+
390
+ if derphi0 is None:
391
+ derphi0 = derphi(0.)
392
+
393
+ alpha0 = 0
394
+ if old_phi0 is not None and derphi0 != 0:
395
+ alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
396
+ else:
397
+ alpha1 = 1.0
398
+
399
+ if alpha1 < 0:
400
+ alpha1 = 1.0
401
+
402
+ if amax is not None:
403
+ alpha1 = min(alpha1, amax)
404
+
405
+ phi_a1 = phi(alpha1)
406
+ #derphi_a1 = derphi(alpha1) evaluated below
407
+
408
+ phi_a0 = phi0
409
+ derphi_a0 = derphi0
410
+
411
+ if extra_condition is None:
412
+ def extra_condition(alpha, phi):
413
+ return True
414
+
415
+ for i in range(maxiter):
416
+ if alpha1 == 0 or (amax is not None and alpha0 > amax):
417
+ # alpha1 == 0: This shouldn't happen. Perhaps the increment has
418
+ # slipped below machine precision?
419
+ alpha_star = None
420
+ phi_star = phi0
421
+ phi0 = old_phi0
422
+ derphi_star = None
423
+
424
+ if alpha1 == 0:
425
+ msg = 'Rounding errors prevent the line search from converging'
426
+ else:
427
+ msg = "The line search algorithm could not find a solution " + \
428
+ "less than or equal to amax: %s" % amax
429
+
430
+ warn(msg, LineSearchWarning, stacklevel=2)
431
+ break
432
+
433
+ not_first_iteration = i > 0
434
+ if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
435
+ ((phi_a1 >= phi_a0) and not_first_iteration):
436
+ alpha_star, phi_star, derphi_star = \
437
+ _zoom(alpha0, alpha1, phi_a0,
438
+ phi_a1, derphi_a0, phi, derphi,
439
+ phi0, derphi0, c1, c2, extra_condition)
440
+ break
441
+
442
+ derphi_a1 = derphi(alpha1)
443
+ if (abs(derphi_a1) <= -c2*derphi0):
444
+ if extra_condition(alpha1, phi_a1):
445
+ alpha_star = alpha1
446
+ phi_star = phi_a1
447
+ derphi_star = derphi_a1
448
+ break
449
+
450
+ if (derphi_a1 >= 0):
451
+ alpha_star, phi_star, derphi_star = \
452
+ _zoom(alpha1, alpha0, phi_a1,
453
+ phi_a0, derphi_a1, phi, derphi,
454
+ phi0, derphi0, c1, c2, extra_condition)
455
+ break
456
+
457
+ alpha2 = 2 * alpha1 # increase by factor of two on each iteration
458
+ if amax is not None:
459
+ alpha2 = min(alpha2, amax)
460
+ alpha0 = alpha1
461
+ alpha1 = alpha2
462
+ phi_a0 = phi_a1
463
+ phi_a1 = phi(alpha1)
464
+ derphi_a0 = derphi_a1
465
+
466
+ else:
467
+ # stopping test maxiter reached
468
+ alpha_star = alpha1
469
+ phi_star = phi_a1
470
+ derphi_star = None
471
+ warn('The line search algorithm did not converge',
472
+ LineSearchWarning, stacklevel=2)
473
+
474
+ return alpha_star, phi_star, phi0, derphi_star
475
+
476
+
477
+ def _cubicmin(a, fa, fpa, b, fb, c, fc):
478
+ """
479
+ Finds the minimizer for a cubic polynomial that goes through the
480
+ points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
481
+
482
+ If no minimizer can be found, return None.
483
+
484
+ """
485
+ # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
486
+
487
+ with np.errstate(divide='raise', over='raise', invalid='raise'):
488
+ try:
489
+ C = fpa
490
+ db = b - a
491
+ dc = c - a
492
+ denom = (db * dc) ** 2 * (db - dc)
493
+ d1 = np.empty((2, 2))
494
+ d1[0, 0] = dc ** 2
495
+ d1[0, 1] = -db ** 2
496
+ d1[1, 0] = -dc ** 3
497
+ d1[1, 1] = db ** 3
498
+ [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
499
+ fc - fa - C * dc]).flatten())
500
+ A /= denom
501
+ B /= denom
502
+ radical = B * B - 3 * A * C
503
+ xmin = a + (-B + np.sqrt(radical)) / (3 * A)
504
+ except ArithmeticError:
505
+ return None
506
+ if not np.isfinite(xmin):
507
+ return None
508
+ return xmin
509
+
510
+
511
+ def _quadmin(a, fa, fpa, b, fb):
512
+ """
513
+ Finds the minimizer for a quadratic polynomial that goes through
514
+ the points (a,fa), (b,fb) with derivative at a of fpa.
515
+
516
+ """
517
+ # f(x) = B*(x-a)^2 + C*(x-a) + D
518
+ with np.errstate(divide='raise', over='raise', invalid='raise'):
519
+ try:
520
+ D = fa
521
+ C = fpa
522
+ db = b - a * 1.0
523
+ B = (fb - D - C * db) / (db * db)
524
+ xmin = a - C / (2.0 * B)
525
+ except ArithmeticError:
526
+ return None
527
+ if not np.isfinite(xmin):
528
+ return None
529
+ return xmin
530
+
531
+
532
+ def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
533
+ phi, derphi, phi0, derphi0, c1, c2, extra_condition):
534
+ """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
535
+
536
+ Part of the optimization algorithm in `scalar_search_wolfe2`.
537
+
538
+ Notes
539
+ -----
540
+ Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
541
+ 'Numerical Optimization', 1999, pp. 61.
542
+
543
+ """
544
+
545
+ maxiter = 10
546
+ i = 0
547
+ delta1 = 0.2 # cubic interpolant check
548
+ delta2 = 0.1 # quadratic interpolant check
549
+ phi_rec = phi0
550
+ a_rec = 0
551
+ while True:
552
+ # interpolate to find a trial step length between a_lo and
553
+ # a_hi Need to choose interpolation here. Use cubic
554
+ # interpolation and then if the result is within delta *
555
+ # dalpha or outside of the interval bounded by a_lo or a_hi
556
+ # then use quadratic interpolation, if the result is still too
557
+ # close, then use bisection
558
+
559
+ dalpha = a_hi - a_lo
560
+ if dalpha < 0:
561
+ a, b = a_hi, a_lo
562
+ else:
563
+ a, b = a_lo, a_hi
564
+
565
+ # minimizer of cubic interpolant
566
+ # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
567
+ #
568
+ # if the result is too close to the end points (or out of the
569
+ # interval), then use quadratic interpolation with phi_lo,
570
+ # derphi_lo and phi_hi if the result is still too close to the
571
+ # end points (or out of the interval) then use bisection
572
+
573
+ if (i > 0):
574
+ cchk = delta1 * dalpha
575
+ a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
576
+ a_rec, phi_rec)
577
+ if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
578
+ qchk = delta2 * dalpha
579
+ a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
580
+ if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
581
+ a_j = a_lo + 0.5*dalpha
582
+
583
+ # Check new value of a_j
584
+
585
+ phi_aj = phi(a_j)
586
+ if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
587
+ phi_rec = phi_hi
588
+ a_rec = a_hi
589
+ a_hi = a_j
590
+ phi_hi = phi_aj
591
+ else:
592
+ derphi_aj = derphi(a_j)
593
+ if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
594
+ a_star = a_j
595
+ val_star = phi_aj
596
+ valprime_star = derphi_aj
597
+ break
598
+ if derphi_aj*(a_hi - a_lo) >= 0:
599
+ phi_rec = phi_hi
600
+ a_rec = a_hi
601
+ a_hi = a_lo
602
+ phi_hi = phi_lo
603
+ else:
604
+ phi_rec = phi_lo
605
+ a_rec = a_lo
606
+ a_lo = a_j
607
+ phi_lo = phi_aj
608
+ derphi_lo = derphi_aj
609
+ i += 1
610
+ if (i > maxiter):
611
+ # Failed to find a conforming step size
612
+ a_star = None
613
+ val_star = None
614
+ valprime_star = None
615
+ break
616
+ return a_star, val_star, valprime_star
617
+
618
+
619
+ #------------------------------------------------------------------------------
620
+ # Armijo line and scalar searches
621
+ #------------------------------------------------------------------------------
622
+
623
+ def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
624
+ """Minimize over alpha, the function ``f(xk+alpha pk)``.
625
+
626
+ Parameters
627
+ ----------
628
+ f : callable
629
+ Function to be minimized.
630
+ xk : array_like
631
+ Current point.
632
+ pk : array_like
633
+ Search direction.
634
+ gfk : array_like
635
+ Gradient of `f` at point `xk`.
636
+ old_fval : float
637
+ Value of `f` at point `xk`.
638
+ args : tuple, optional
639
+ Optional arguments.
640
+ c1 : float, optional
641
+ Value to control stopping criterion.
642
+ alpha0 : scalar, optional
643
+ Value of `alpha` at start of the optimization.
644
+
645
+ Returns
646
+ -------
647
+ alpha
648
+ f_count
649
+ f_val_at_alpha
650
+
651
+ Notes
652
+ -----
653
+ Uses the interpolation algorithm (Armijo backtracking) as suggested by
654
+ Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
655
+
656
+ """
657
+ xk = np.atleast_1d(xk)
658
+ fc = [0]
659
+
660
+ def phi(alpha1):
661
+ fc[0] += 1
662
+ return f(xk + alpha1*pk, *args)
663
+
664
+ if old_fval is None:
665
+ phi0 = phi(0.)
666
+ else:
667
+ phi0 = old_fval # compute f(xk) -- done in past loop
668
+
669
+ derphi0 = np.dot(gfk, pk)
670
+ alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
671
+ alpha0=alpha0)
672
+ return alpha, fc[0], phi1
673
+
674
+
675
+ def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
676
+ """
677
+ Compatibility wrapper for `line_search_armijo`
678
+ """
679
+ r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
680
+ alpha0=alpha0)
681
+ return r[0], r[1], 0, r[2]
682
+
683
+
684
+ def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
685
+ """Minimize over alpha, the function ``phi(alpha)``.
686
+
687
+ Uses the interpolation algorithm (Armijo backtracking) as suggested by
688
+ Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
689
+
690
+ alpha > 0 is assumed to be a descent direction.
691
+
692
+ Returns
693
+ -------
694
+ alpha
695
+ phi1
696
+
697
+ """
698
+ phi_a0 = phi(alpha0)
699
+ if phi_a0 <= phi0 + c1*alpha0*derphi0:
700
+ return alpha0, phi_a0
701
+
702
+ # Otherwise, compute the minimizer of a quadratic interpolant:
703
+
704
+ alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
705
+ phi_a1 = phi(alpha1)
706
+
707
+ if (phi_a1 <= phi0 + c1*alpha1*derphi0):
708
+ return alpha1, phi_a1
709
+
710
+ # Otherwise, loop with cubic interpolation until we find an alpha which
711
+ # satisfies the first Wolfe condition (since we are backtracking, we will
712
+ # assume that the value of alpha is not too small and satisfies the second
713
+ # condition.
714
+
715
+ while alpha1 > amin: # we are assuming alpha>0 is a descent direction
716
+ factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
717
+ a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
718
+ alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
719
+ a = a / factor
720
+ b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
721
+ alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
722
+ b = b / factor
723
+
724
+ alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
725
+ phi_a2 = phi(alpha2)
726
+
727
+ if (phi_a2 <= phi0 + c1*alpha2*derphi0):
728
+ return alpha2, phi_a2
729
+
730
+ if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
731
+ alpha2 = alpha1 / 2.0
732
+
733
+ alpha0 = alpha1
734
+ alpha1 = alpha2
735
+ phi_a0 = phi_a1
736
+ phi_a1 = phi_a2
737
+
738
+ # Failed to find a suitable step length
739
+ return None, phi_a1
740
+
741
+
742
+ #------------------------------------------------------------------------------
743
+ # Non-monotone line search for DF-SANE
744
+ #------------------------------------------------------------------------------
745
+
746
+ def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
747
+ gamma=1e-4, tau_min=0.1, tau_max=0.5):
748
+ """
749
+ Nonmonotone backtracking line search as described in [1]_
750
+
751
+ Parameters
752
+ ----------
753
+ f : callable
754
+ Function returning a tuple ``(f, F)`` where ``f`` is the value
755
+ of a merit function and ``F`` the residual.
756
+ x_k : ndarray
757
+ Initial position.
758
+ d : ndarray
759
+ Search direction.
760
+ prev_fs : float
761
+ List of previous merit function values. Should have ``len(prev_fs) <= M``
762
+ where ``M`` is the nonmonotonicity window parameter.
763
+ eta : float
764
+ Allowed merit function increase, see [1]_
765
+ gamma, tau_min, tau_max : float, optional
766
+ Search parameters, see [1]_
767
+
768
+ Returns
769
+ -------
770
+ alpha : float
771
+ Step length
772
+ xp : ndarray
773
+ Next position
774
+ fp : float
775
+ Merit function value at next position
776
+ Fp : ndarray
777
+ Residual at next position
778
+
779
+ References
780
+ ----------
781
+ [1] "Spectral residual method without gradient information for solving
782
+ large-scale nonlinear systems of equations." W. La Cruz,
783
+ J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
784
+
785
+ """
786
+ f_k = prev_fs[-1]
787
+ f_bar = max(prev_fs)
788
+
789
+ alpha_p = 1
790
+ alpha_m = 1
791
+ alpha = 1
792
+
793
+ while True:
794
+ xp = x_k + alpha_p * d
795
+ fp, Fp = f(xp)
796
+
797
+ if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
798
+ alpha = alpha_p
799
+ break
800
+
801
+ alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
802
+
803
+ xp = x_k - alpha_m * d
804
+ fp, Fp = f(xp)
805
+
806
+ if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
807
+ alpha = -alpha_m
808
+ break
809
+
810
+ alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
811
+
812
+ alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
813
+ alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
814
+
815
+ return alpha, xp, fp, Fp
816
+
817
+
818
+ def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
819
+ gamma=1e-4, tau_min=0.1, tau_max=0.5,
820
+ nu=0.85):
821
+ """
822
+ Nonmonotone line search from [1]
823
+
824
+ Parameters
825
+ ----------
826
+ f : callable
827
+ Function returning a tuple ``(f, F)`` where ``f`` is the value
828
+ of a merit function and ``F`` the residual.
829
+ x_k : ndarray
830
+ Initial position.
831
+ d : ndarray
832
+ Search direction.
833
+ f_k : float
834
+ Initial merit function value.
835
+ C, Q : float
836
+ Control parameters. On the first iteration, give values
837
+ Q=1.0, C=f_k
838
+ eta : float
839
+ Allowed merit function increase, see [1]_
840
+ nu, gamma, tau_min, tau_max : float, optional
841
+ Search parameters, see [1]_
842
+
843
+ Returns
844
+ -------
845
+ alpha : float
846
+ Step length
847
+ xp : ndarray
848
+ Next position
849
+ fp : float
850
+ Merit function value at next position
851
+ Fp : ndarray
852
+ Residual at next position
853
+ C : float
854
+ New value for the control parameter C
855
+ Q : float
856
+ New value for the control parameter Q
857
+
858
+ References
859
+ ----------
860
+ .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
861
+ search and its application to the spectral residual
862
+ method'', IMA J. Numer. Anal. 29, 814 (2009).
863
+
864
+ """
865
+ alpha_p = 1
866
+ alpha_m = 1
867
+ alpha = 1
868
+
869
+ while True:
870
+ xp = x_k + alpha_p * d
871
+ fp, Fp = f(xp)
872
+
873
+ if fp <= C + eta - gamma * alpha_p**2 * f_k:
874
+ alpha = alpha_p
875
+ break
876
+
877
+ alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
878
+
879
+ xp = x_k - alpha_m * d
880
+ fp, Fp = f(xp)
881
+
882
+ if fp <= C + eta - gamma * alpha_m**2 * f_k:
883
+ alpha = -alpha_m
884
+ break
885
+
886
+ alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
887
+
888
+ alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
889
+ alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
890
+
891
+ # Update C and Q
892
+ Q_next = nu * Q + 1
893
+ C = (nu * Q * (C + eta) + fp) / Q_next
894
+ Q = Q_next
895
+
896
+ return alpha, xp, fp, Fp, C, Q
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog.py ADDED
@@ -0,0 +1,716 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ A top-level linear programming interface.
3
+
4
+ .. versionadded:: 0.15.0
5
+
6
+ Functions
7
+ ---------
8
+ .. autosummary::
9
+ :toctree: generated/
10
+
11
+ linprog
12
+ linprog_verbose_callback
13
+ linprog_terse_callback
14
+
15
+ """
16
+
17
+ import numpy as np
18
+
19
+ from ._optimize import OptimizeResult, OptimizeWarning
20
+ from warnings import warn
21
+ from ._linprog_highs import _linprog_highs
22
+ from ._linprog_ip import _linprog_ip
23
+ from ._linprog_simplex import _linprog_simplex
24
+ from ._linprog_rs import _linprog_rs
25
+ from ._linprog_doc import (_linprog_highs_doc, _linprog_ip_doc, # noqa: F401
26
+ _linprog_rs_doc, _linprog_simplex_doc,
27
+ _linprog_highs_ipm_doc, _linprog_highs_ds_doc)
28
+ from ._linprog_util import (
29
+ _parse_linprog, _presolve, _get_Abc, _LPProblem, _autoscale,
30
+ _postsolve, _check_result, _display_summary)
31
+ from copy import deepcopy
32
+
33
+ __all__ = ['linprog', 'linprog_verbose_callback', 'linprog_terse_callback']
34
+
35
+ __docformat__ = "restructuredtext en"
36
+
37
+ LINPROG_METHODS = [
38
+ 'simplex', 'revised simplex', 'interior-point', 'highs', 'highs-ds', 'highs-ipm'
39
+ ]
40
+
41
+
42
+ def linprog_verbose_callback(res):
43
+ """
44
+ A sample callback function demonstrating the linprog callback interface.
45
+ This callback produces detailed output to sys.stdout before each iteration
46
+ and after the final iteration of the simplex algorithm.
47
+
48
+ Parameters
49
+ ----------
50
+ res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
51
+
52
+ x : 1-D array
53
+ The independent variable vector which optimizes the linear
54
+ programming problem.
55
+ fun : float
56
+ Value of the objective function.
57
+ success : bool
58
+ True if the algorithm succeeded in finding an optimal solution.
59
+ slack : 1-D array
60
+ The values of the slack variables. Each slack variable corresponds
61
+ to an inequality constraint. If the slack is zero, then the
62
+ corresponding constraint is active.
63
+ con : 1-D array
64
+ The (nominally zero) residuals of the equality constraints, that is,
65
+ ``b - A_eq @ x``
66
+ phase : int
67
+ The phase of the optimization being executed. In phase 1 a basic
68
+ feasible solution is sought and the T has an additional row
69
+ representing an alternate objective function.
70
+ status : int
71
+ An integer representing the exit status of the optimization::
72
+
73
+ 0 : Optimization terminated successfully
74
+ 1 : Iteration limit reached
75
+ 2 : Problem appears to be infeasible
76
+ 3 : Problem appears to be unbounded
77
+ 4 : Serious numerical difficulties encountered
78
+
79
+ nit : int
80
+ The number of iterations performed.
81
+ message : str
82
+ A string descriptor of the exit status of the optimization.
83
+ """
84
+ x = res['x']
85
+ fun = res['fun']
86
+ phase = res['phase']
87
+ status = res['status']
88
+ nit = res['nit']
89
+ message = res['message']
90
+ complete = res['complete']
91
+
92
+ saved_printoptions = np.get_printoptions()
93
+ np.set_printoptions(linewidth=500,
94
+ formatter={'float': lambda x: f"{x: 12.4f}"})
95
+ if status:
96
+ print('--------- Simplex Early Exit -------\n')
97
+ print(f'The simplex method exited early with status {status:d}')
98
+ print(message)
99
+ elif complete:
100
+ print('--------- Simplex Complete --------\n')
101
+ print(f'Iterations required: {nit}')
102
+ else:
103
+ print(f'--------- Iteration {nit:d} ---------\n')
104
+
105
+ if nit > 0:
106
+ if phase == 1:
107
+ print('Current Pseudo-Objective Value:')
108
+ else:
109
+ print('Current Objective Value:')
110
+ print('f = ', fun)
111
+ print()
112
+ print('Current Solution Vector:')
113
+ print('x = ', x)
114
+ print()
115
+
116
+ np.set_printoptions(**saved_printoptions)
117
+
118
+
119
+ def linprog_terse_callback(res):
120
+ """
121
+ A sample callback function demonstrating the linprog callback interface.
122
+ This callback produces brief output to sys.stdout before each iteration
123
+ and after the final iteration of the simplex algorithm.
124
+
125
+ Parameters
126
+ ----------
127
+ res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
128
+
129
+ x : 1-D array
130
+ The independent variable vector which optimizes the linear
131
+ programming problem.
132
+ fun : float
133
+ Value of the objective function.
134
+ success : bool
135
+ True if the algorithm succeeded in finding an optimal solution.
136
+ slack : 1-D array
137
+ The values of the slack variables. Each slack variable corresponds
138
+ to an inequality constraint. If the slack is zero, then the
139
+ corresponding constraint is active.
140
+ con : 1-D array
141
+ The (nominally zero) residuals of the equality constraints, that is,
142
+ ``b - A_eq @ x``.
143
+ phase : int
144
+ The phase of the optimization being executed. In phase 1 a basic
145
+ feasible solution is sought and the T has an additional row
146
+ representing an alternate objective function.
147
+ status : int
148
+ An integer representing the exit status of the optimization::
149
+
150
+ 0 : Optimization terminated successfully
151
+ 1 : Iteration limit reached
152
+ 2 : Problem appears to be infeasible
153
+ 3 : Problem appears to be unbounded
154
+ 4 : Serious numerical difficulties encountered
155
+
156
+ nit : int
157
+ The number of iterations performed.
158
+ message : str
159
+ A string descriptor of the exit status of the optimization.
160
+ """
161
+ nit = res['nit']
162
+ x = res['x']
163
+
164
+ if nit == 0:
165
+ print("Iter: X:")
166
+ print(f"{nit: <5d} ", end="")
167
+ print(x)
168
+
169
+
170
+ def linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
171
+ bounds=(0, None), method='highs', callback=None,
172
+ options=None, x0=None, integrality=None):
173
+ r"""
174
+ Linear programming: minimize a linear objective function subject to linear
175
+ equality and inequality constraints.
176
+
177
+ Linear programming solves problems of the following form:
178
+
179
+ .. math::
180
+
181
+ \min_x \ & c^T x \\
182
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
183
+ & A_{eq} x = b_{eq},\\
184
+ & l \leq x \leq u ,
185
+
186
+ where :math:`x` is a vector of decision variables; :math:`c`,
187
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
188
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
189
+
190
+ Alternatively, that's:
191
+
192
+ - minimize ::
193
+
194
+ c @ x
195
+
196
+ - such that ::
197
+
198
+ A_ub @ x <= b_ub
199
+ A_eq @ x == b_eq
200
+ lb <= x <= ub
201
+
202
+ Note that by default ``lb = 0`` and ``ub = None``. Other bounds can be
203
+ specified with ``bounds``.
204
+
205
+ Parameters
206
+ ----------
207
+ c : 1-D array
208
+ The coefficients of the linear objective function to be minimized.
209
+ A_ub : 2-D array, optional
210
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
211
+ coefficients of a linear inequality constraint on ``x``.
212
+ b_ub : 1-D array, optional
213
+ The inequality constraint vector. Each element represents an
214
+ upper bound on the corresponding value of ``A_ub @ x``.
215
+ A_eq : 2-D array, optional
216
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
217
+ coefficients of a linear equality constraint on ``x``.
218
+ b_eq : 1-D array, optional
219
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
220
+ the corresponding element of ``b_eq``.
221
+ bounds : sequence, optional
222
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
223
+ the minimum and maximum values of that decision variable.
224
+ If a single tuple ``(min, max)`` is provided, then ``min`` and ``max``
225
+ will serve as bounds for all decision variables.
226
+ Use ``None`` to indicate that there is no bound. For instance, the
227
+ default bound ``(0, None)`` means that all decision variables are
228
+ non-negative, and the pair ``(None, None)`` means no bounds at all,
229
+ i.e. all variables are allowed to be any real.
230
+ method : str, optional
231
+ The algorithm used to solve the standard form problem.
232
+ :ref:`'highs' <optimize.linprog-highs>` (default),
233
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
234
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
235
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (legacy),
236
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>` (legacy),
237
+ and
238
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy) are supported.
239
+ The legacy methods are deprecated and will be removed in SciPy 1.11.0.
240
+ callback : callable, optional
241
+ If a callback function is provided, it will be called at least once per
242
+ iteration of the algorithm. The callback function must accept a single
243
+ `scipy.optimize.OptimizeResult` consisting of the following fields:
244
+
245
+ x : 1-D array
246
+ The current solution vector.
247
+ fun : float
248
+ The current value of the objective function ``c @ x``.
249
+ success : bool
250
+ ``True`` when the algorithm has completed successfully.
251
+ slack : 1-D array
252
+ The (nominally positive) values of the slack,
253
+ ``b_ub - A_ub @ x``.
254
+ con : 1-D array
255
+ The (nominally zero) residuals of the equality constraints,
256
+ ``b_eq - A_eq @ x``.
257
+ phase : int
258
+ The phase of the algorithm being executed.
259
+ status : int
260
+ An integer representing the status of the algorithm.
261
+
262
+ ``0`` : Optimization proceeding nominally.
263
+
264
+ ``1`` : Iteration limit reached.
265
+
266
+ ``2`` : Problem appears to be infeasible.
267
+
268
+ ``3`` : Problem appears to be unbounded.
269
+
270
+ ``4`` : Numerical difficulties encountered.
271
+
272
+ nit : int
273
+ The current iteration number.
274
+ message : str
275
+ A string descriptor of the algorithm status.
276
+
277
+ Callback functions are not currently supported by the HiGHS methods.
278
+
279
+ options : dict, optional
280
+ A dictionary of solver options. All methods accept the following
281
+ options:
282
+
283
+ maxiter : int
284
+ Maximum number of iterations to perform.
285
+ Default: see method-specific documentation.
286
+ disp : bool
287
+ Set to ``True`` to print convergence messages.
288
+ Default: ``False``.
289
+ presolve : bool
290
+ Set to ``False`` to disable automatic presolve.
291
+ Default: ``True``.
292
+
293
+ All methods except the HiGHS solvers also accept:
294
+
295
+ tol : float
296
+ A tolerance which determines when a residual is "close enough" to
297
+ zero to be considered exactly zero.
298
+ autoscale : bool
299
+ Set to ``True`` to automatically perform equilibration.
300
+ Consider using this option if the numerical values in the
301
+ constraints are separated by several orders of magnitude.
302
+ Default: ``False``.
303
+ rr : bool
304
+ Set to ``False`` to disable automatic redundancy removal.
305
+ Default: ``True``.
306
+ rr_method : string
307
+ Method used to identify and remove redundant rows from the
308
+ equality constraint matrix after presolve. For problems with
309
+ dense input, the available methods for redundancy removal are:
310
+
311
+ "SVD":
312
+ Repeatedly performs singular value decomposition on
313
+ the matrix, detecting redundant rows based on nonzeros
314
+ in the left singular vectors that correspond with
315
+ zero singular values. May be fast when the matrix is
316
+ nearly full rank.
317
+ "pivot":
318
+ Uses the algorithm presented in [5]_ to identify
319
+ redundant rows.
320
+ "ID":
321
+ Uses a randomized interpolative decomposition.
322
+ Identifies columns of the matrix transpose not used in
323
+ a full-rank interpolative decomposition of the matrix.
324
+ None:
325
+ Uses "svd" if the matrix is nearly full rank, that is,
326
+ the difference between the matrix rank and the number
327
+ of rows is less than five. If not, uses "pivot". The
328
+ behavior of this default is subject to change without
329
+ prior notice.
330
+
331
+ Default: None.
332
+ For problems with sparse input, this option is ignored, and the
333
+ pivot-based algorithm presented in [5]_ is used.
334
+
335
+ For method-specific options, see
336
+ :func:`show_options('linprog') <show_options>`.
337
+
338
+ x0 : 1-D array, optional
339
+ Guess values of the decision variables, which will be refined by
340
+ the optimization algorithm. This argument is currently used only by the
341
+ 'revised simplex' method, and can only be used if `x0` represents a
342
+ basic feasible solution.
343
+
344
+ integrality : 1-D array or int, optional
345
+ Indicates the type of integrality constraint on each decision variable.
346
+
347
+ ``0`` : Continuous variable; no integrality constraint.
348
+
349
+ ``1`` : Integer variable; decision variable must be an integer
350
+ within `bounds`.
351
+
352
+ ``2`` : Semi-continuous variable; decision variable must be within
353
+ `bounds` or take value ``0``.
354
+
355
+ ``3`` : Semi-integer variable; decision variable must be an integer
356
+ within `bounds` or take value ``0``.
357
+
358
+ By default, all variables are continuous.
359
+
360
+ For mixed integrality constraints, supply an array of shape `c.shape`.
361
+ To infer a constraint on each decision variable from shorter inputs,
362
+ the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
363
+
364
+ This argument is currently used only by the ``'highs'`` method and
365
+ ignored otherwise.
366
+
367
+ Returns
368
+ -------
369
+ res : OptimizeResult
370
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields
371
+ below. Note that the return types of the fields may depend on whether
372
+ the optimization was successful, therefore it is recommended to check
373
+ `OptimizeResult.status` before relying on the other fields:
374
+
375
+ x : 1-D array
376
+ The values of the decision variables that minimizes the
377
+ objective function while satisfying the constraints.
378
+ fun : float
379
+ The optimal value of the objective function ``c @ x``.
380
+ slack : 1-D array
381
+ The (nominally positive) values of the slack variables,
382
+ ``b_ub - A_ub @ x``.
383
+ con : 1-D array
384
+ The (nominally zero) residuals of the equality constraints,
385
+ ``b_eq - A_eq @ x``.
386
+ success : bool
387
+ ``True`` when the algorithm succeeds in finding an optimal
388
+ solution.
389
+ status : int
390
+ An integer representing the exit status of the algorithm.
391
+
392
+ ``0`` : Optimization terminated successfully.
393
+
394
+ ``1`` : Iteration limit reached.
395
+
396
+ ``2`` : Problem appears to be infeasible.
397
+
398
+ ``3`` : Problem appears to be unbounded.
399
+
400
+ ``4`` : Numerical difficulties encountered.
401
+
402
+ nit : int
403
+ The total number of iterations performed in all phases.
404
+ message : str
405
+ A string descriptor of the exit status of the algorithm.
406
+
407
+ See Also
408
+ --------
409
+ show_options : Additional options accepted by the solvers.
410
+
411
+ Notes
412
+ -----
413
+ This section describes the available solvers that can be selected by the
414
+ 'method' parameter.
415
+
416
+ `'highs-ds'` and
417
+ `'highs-ipm'` are interfaces to the
418
+ HiGHS simplex and interior-point method solvers [13]_, respectively.
419
+ `'highs'` (default) chooses between
420
+ the two automatically. These are the fastest linear
421
+ programming solvers in SciPy, especially for large, sparse problems;
422
+ which of these two is faster is problem-dependent.
423
+ The other solvers (`'interior-point'`, `'revised simplex'`, and
424
+ `'simplex'`) are legacy methods and will be removed in SciPy 1.11.0.
425
+
426
+ Method *highs-ds* is a wrapper of the C++ high performance dual
427
+ revised simplex implementation (HSOL) [13]_, [14]_. Method *highs-ipm*
428
+ is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
429
+ **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
430
+ as a simplex solver. Method *highs* chooses between the two automatically.
431
+ For new code involving `linprog`, we recommend explicitly choosing one of
432
+ these three method values.
433
+
434
+ .. versionadded:: 1.6.0
435
+
436
+ Method *interior-point* uses the primal-dual path following algorithm
437
+ as outlined in [4]_. This algorithm supports sparse constraint matrices and
438
+ is typically faster than the simplex methods, especially for large, sparse
439
+ problems. Note, however, that the solution returned may be slightly less
440
+ accurate than those of the simplex methods and will not, in general,
441
+ correspond with a vertex of the polytope defined by the constraints.
442
+
443
+ .. versionadded:: 1.0.0
444
+
445
+ Method *revised simplex* uses the revised simplex method as described in
446
+ [9]_, except that a factorization [11]_ of the basis matrix, rather than
447
+ its inverse, is efficiently maintained and used to solve the linear systems
448
+ at each iteration of the algorithm.
449
+
450
+ .. versionadded:: 1.3.0
451
+
452
+ Method *simplex* uses a traditional, full-tableau implementation of
453
+ Dantzig's simplex algorithm [1]_, [2]_ (*not* the
454
+ Nelder-Mead simplex). This algorithm is included for backwards
455
+ compatibility and educational purposes.
456
+
457
+ .. versionadded:: 0.15.0
458
+
459
+ Before applying *interior-point*, *revised simplex*, or *simplex*,
460
+ a presolve procedure based on [8]_ attempts
461
+ to identify trivial infeasibilities, trivial unboundedness, and potential
462
+ problem simplifications. Specifically, it checks for:
463
+
464
+ - rows of zeros in ``A_eq`` or ``A_ub``, representing trivial constraints;
465
+ - columns of zeros in ``A_eq`` `and` ``A_ub``, representing unconstrained
466
+ variables;
467
+ - column singletons in ``A_eq``, representing fixed variables; and
468
+ - column singletons in ``A_ub``, representing simple bounds.
469
+
470
+ If presolve reveals that the problem is unbounded (e.g. an unconstrained
471
+ and unbounded variable has negative cost) or infeasible (e.g., a row of
472
+ zeros in ``A_eq`` corresponds with a nonzero in ``b_eq``), the solver
473
+ terminates with the appropriate status code. Note that presolve terminates
474
+ as soon as any sign of unboundedness is detected; consequently, a problem
475
+ may be reported as unbounded when in reality the problem is infeasible
476
+ (but infeasibility has not been detected yet). Therefore, if it is
477
+ important to know whether the problem is actually infeasible, solve the
478
+ problem again with option ``presolve=False``.
479
+
480
+ If neither infeasibility nor unboundedness are detected in a single pass
481
+ of the presolve, bounds are tightened where possible and fixed
482
+ variables are removed from the problem. Then, linearly dependent rows
483
+ of the ``A_eq`` matrix are removed, (unless they represent an
484
+ infeasibility) to avoid numerical difficulties in the primary solve
485
+ routine. Note that rows that are nearly linearly dependent (within a
486
+ prescribed tolerance) may also be removed, which can change the optimal
487
+ solution in rare cases. If this is a concern, eliminate redundancy from
488
+ your problem formulation and run with option ``rr=False`` or
489
+ ``presolve=False``.
490
+
491
+ Several potential improvements can be made here: additional presolve
492
+ checks outlined in [8]_ should be implemented, the presolve routine should
493
+ be run multiple times (until no further simplifications can be made), and
494
+ more of the efficiency improvements from [5]_ should be implemented in the
495
+ redundancy removal routines.
496
+
497
+ After presolve, the problem is transformed to standard form by converting
498
+ the (tightened) simple bounds to upper bound constraints, introducing
499
+ non-negative slack variables for inequality constraints, and expressing
500
+ unbounded variables as the difference between two non-negative variables.
501
+ Optionally, the problem is automatically scaled via equilibration [12]_.
502
+ The selected algorithm solves the standard form problem, and a
503
+ postprocessing routine converts the result to a solution to the original
504
+ problem.
505
+
506
+ References
507
+ ----------
508
+ .. [1] Dantzig, George B., Linear programming and extensions. Rand
509
+ Corporation Research Study Princeton Univ. Press, Princeton, NJ,
510
+ 1963
511
+ .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
512
+ Mathematical Programming", McGraw-Hill, Chapter 4.
513
+ .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
514
+ Mathematics of Operations Research (2), 1977: pp. 103-107.
515
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
516
+ optimizer for linear programming: an implementation of the
517
+ homogeneous algorithm." High performance optimization. Springer US,
518
+ 2000. 197-232.
519
+ .. [5] Andersen, Erling D. "Finding all linearly dependent rows in
520
+ large-scale linear programming." Optimization Methods and Software
521
+ 6.3 (1995): 219-227.
522
+ .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
523
+ Programming based on Newton's Method." Unpublished Course Notes,
524
+ March 2004. Available 2/25/2017 at
525
+ https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
526
+ .. [7] Fourer, Robert. "Solving Linear Programs by Interior-Point Methods."
527
+ Unpublished Course Notes, August 26, 2005. Available 2/25/2017 at
528
+ http://www.4er.org/CourseNotes/Book%20B/B-III.pdf
529
+ .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
530
+ programming." Mathematical Programming 71.2 (1995): 221-245.
531
+ .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
532
+ programming." Athena Scientific 1 (1997): 997.
533
+ .. [10] Andersen, Erling D., et al. Implementation of interior point
534
+ methods for large scale linear programming. HEC/Universite de
535
+ Geneve, 1996.
536
+ .. [11] Bartels, Richard H. "A stabilization of the simplex method."
537
+ Journal in Numerische Mathematik 16.5 (1971): 414-434.
538
+ .. [12] Tomlin, J. A. "On scaling linear programming problems."
539
+ Mathematical Programming Study 4 (1975): 146-166.
540
+ .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
541
+ "HiGHS - high performance software for linear optimization."
542
+ https://highs.dev/
543
+ .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
544
+ simplex method." Mathematical Programming Computation, 10 (1),
545
+ 119-142, 2018. DOI: 10.1007/s12532-017-0130-5
546
+
547
+ Examples
548
+ --------
549
+ Consider the following problem:
550
+
551
+ .. math::
552
+
553
+ \min_{x_0, x_1} \ -x_0 + 4x_1 & \\
554
+ \mbox{such that} \ -3x_0 + x_1 & \leq 6,\\
555
+ -x_0 - 2x_1 & \geq -4,\\
556
+ x_1 & \geq -3.
557
+
558
+ The problem is not presented in the form accepted by `linprog`. This is
559
+ easily remedied by converting the "greater than" inequality
560
+ constraint to a "less than" inequality constraint by
561
+ multiplying both sides by a factor of :math:`-1`. Note also that the last
562
+ constraint is really the simple bound :math:`-3 \leq x_1 \leq \infty`.
563
+ Finally, since there are no bounds on :math:`x_0`, we must explicitly
564
+ specify the bounds :math:`-\infty \leq x_0 \leq \infty`, as the
565
+ default is for variables to be non-negative. After collecting coeffecients
566
+ into arrays and tuples, the input for this problem is:
567
+
568
+ >>> from scipy.optimize import linprog
569
+ >>> c = [-1, 4]
570
+ >>> A = [[-3, 1], [1, 2]]
571
+ >>> b = [6, 4]
572
+ >>> x0_bounds = (None, None)
573
+ >>> x1_bounds = (-3, None)
574
+ >>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds])
575
+ >>> res.fun
576
+ -22.0
577
+ >>> res.x
578
+ array([10., -3.])
579
+ >>> res.message
580
+ 'Optimization terminated successfully. (HiGHS Status 7: Optimal)'
581
+
582
+ The marginals (AKA dual values / shadow prices / Lagrange multipliers)
583
+ and residuals (slacks) are also available.
584
+
585
+ >>> res.ineqlin
586
+ residual: [ 3.900e+01 0.000e+00]
587
+ marginals: [-0.000e+00 -1.000e+00]
588
+
589
+ For example, because the marginal associated with the second inequality
590
+ constraint is -1, we expect the optimal value of the objective function
591
+ to decrease by ``eps`` if we add a small amount ``eps`` to the right hand
592
+ side of the second inequality constraint:
593
+
594
+ >>> eps = 0.05
595
+ >>> b[1] += eps
596
+ >>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
597
+ -22.05
598
+
599
+ Also, because the residual on the first inequality constraint is 39, we
600
+ can decrease the right hand side of the first constraint by 39 without
601
+ affecting the optimal solution.
602
+
603
+ >>> b = [6, 4] # reset to original values
604
+ >>> b[0] -= 39
605
+ >>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
606
+ -22.0
607
+
608
+ """
609
+
610
+ meth = method.lower()
611
+ methods = {"highs", "highs-ds", "highs-ipm",
612
+ "simplex", "revised simplex", "interior-point"}
613
+
614
+ if meth not in methods:
615
+ raise ValueError(f"Unknown solver '{method}'")
616
+
617
+ if x0 is not None and meth != "revised simplex":
618
+ warning_message = "x0 is used only when method is 'revised simplex'. "
619
+ warn(warning_message, OptimizeWarning, stacklevel=2)
620
+
621
+ if np.any(integrality) and not meth == "highs":
622
+ integrality = None
623
+ warning_message = ("Only `method='highs'` supports integer "
624
+ "constraints. Ignoring `integrality`.")
625
+ warn(warning_message, OptimizeWarning, stacklevel=2)
626
+ elif np.any(integrality):
627
+ integrality = np.broadcast_to(integrality, np.shape(c))
628
+ else:
629
+ integrality = None
630
+
631
+ lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality)
632
+ lp, solver_options = _parse_linprog(lp, options, meth)
633
+ tol = solver_options.get('tol', 1e-9)
634
+
635
+ # Give unmodified problem to HiGHS
636
+ if meth.startswith('highs'):
637
+ if callback is not None:
638
+ raise NotImplementedError("HiGHS solvers do not support the "
639
+ "callback interface.")
640
+ highs_solvers = {'highs-ipm': 'ipm', 'highs-ds': 'simplex',
641
+ 'highs': None}
642
+
643
+ sol = _linprog_highs(lp, solver=highs_solvers[meth],
644
+ **solver_options)
645
+ sol['status'], sol['message'] = (
646
+ _check_result(sol['x'], sol['fun'], sol['status'], sol['slack'],
647
+ sol['con'], lp.bounds, tol, sol['message'],
648
+ integrality))
649
+ sol['success'] = sol['status'] == 0
650
+ return OptimizeResult(sol)
651
+
652
+ warn(f"`method='{meth}'` is deprecated and will be removed in SciPy "
653
+ "1.11.0. Please use one of the HiGHS solvers (e.g. "
654
+ "`method='highs'`) in new code.", DeprecationWarning, stacklevel=2)
655
+
656
+ iteration = 0
657
+ complete = False # will become True if solved in presolve
658
+ undo = []
659
+
660
+ # Keep the original arrays to calculate slack/residuals for original
661
+ # problem.
662
+ lp_o = deepcopy(lp)
663
+
664
+ # Solve trivial problem, eliminate variables, tighten bounds, etc.
665
+ rr_method = solver_options.pop('rr_method', None) # need to pop these;
666
+ rr = solver_options.pop('rr', True) # they're not passed to methods
667
+ c0 = 0 # we might get a constant term in the objective
668
+ if solver_options.pop('presolve', True):
669
+ (lp, c0, x, undo, complete, status, message) = _presolve(lp, rr,
670
+ rr_method,
671
+ tol)
672
+
673
+ C, b_scale = 1, 1 # for trivial unscaling if autoscale is not used
674
+ postsolve_args = (lp_o._replace(bounds=lp.bounds), undo, C, b_scale)
675
+
676
+ if not complete:
677
+ A, b, c, c0, x0 = _get_Abc(lp, c0)
678
+ if solver_options.pop('autoscale', False):
679
+ A, b, c, x0, C, b_scale = _autoscale(A, b, c, x0)
680
+ postsolve_args = postsolve_args[:-2] + (C, b_scale)
681
+
682
+ if meth == 'simplex':
683
+ x, status, message, iteration = _linprog_simplex(
684
+ c, c0=c0, A=A, b=b, callback=callback,
685
+ postsolve_args=postsolve_args, **solver_options)
686
+ elif meth == 'interior-point':
687
+ x, status, message, iteration = _linprog_ip(
688
+ c, c0=c0, A=A, b=b, callback=callback,
689
+ postsolve_args=postsolve_args, **solver_options)
690
+ elif meth == 'revised simplex':
691
+ x, status, message, iteration = _linprog_rs(
692
+ c, c0=c0, A=A, b=b, x0=x0, callback=callback,
693
+ postsolve_args=postsolve_args, **solver_options)
694
+
695
+ # Eliminate artificial variables, re-introduce presolved variables, etc.
696
+ disp = solver_options.get('disp', False)
697
+
698
+ x, fun, slack, con = _postsolve(x, postsolve_args, complete)
699
+
700
+ status, message = _check_result(x, fun, status, slack, con, lp_o.bounds,
701
+ tol, message, integrality)
702
+
703
+ if disp:
704
+ _display_summary(message, status, fun, iteration)
705
+
706
+ sol = {
707
+ 'x': x,
708
+ 'fun': fun,
709
+ 'slack': slack,
710
+ 'con': con,
711
+ 'status': status,
712
+ 'message': message,
713
+ 'nit': iteration,
714
+ 'success': status == 0}
715
+
716
+ return OptimizeResult(sol)
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_doc.py ADDED
@@ -0,0 +1,1434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created on Sat Aug 22 19:49:17 2020
3
+
4
+ @author: matth
5
+ """
6
+
7
+
8
+ def _linprog_highs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
9
+ bounds=None, method='highs', callback=None,
10
+ maxiter=None, disp=False, presolve=True,
11
+ time_limit=None,
12
+ dual_feasibility_tolerance=None,
13
+ primal_feasibility_tolerance=None,
14
+ ipm_optimality_tolerance=None,
15
+ simplex_dual_edge_weight_strategy=None,
16
+ mip_rel_gap=None,
17
+ **unknown_options):
18
+ r"""
19
+ Linear programming: minimize a linear objective function subject to linear
20
+ equality and inequality constraints using one of the HiGHS solvers.
21
+
22
+ Linear programming solves problems of the following form:
23
+
24
+ .. math::
25
+
26
+ \min_x \ & c^T x \\
27
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
28
+ & A_{eq} x = b_{eq},\\
29
+ & l \leq x \leq u ,
30
+
31
+ where :math:`x` is a vector of decision variables; :math:`c`,
32
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
33
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
34
+
35
+ Alternatively, that's:
36
+
37
+ minimize::
38
+
39
+ c @ x
40
+
41
+ such that::
42
+
43
+ A_ub @ x <= b_ub
44
+ A_eq @ x == b_eq
45
+ lb <= x <= ub
46
+
47
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
48
+ ``bounds``.
49
+
50
+ Parameters
51
+ ----------
52
+ c : 1-D array
53
+ The coefficients of the linear objective function to be minimized.
54
+ A_ub : 2-D array, optional
55
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
56
+ coefficients of a linear inequality constraint on ``x``.
57
+ b_ub : 1-D array, optional
58
+ The inequality constraint vector. Each element represents an
59
+ upper bound on the corresponding value of ``A_ub @ x``.
60
+ A_eq : 2-D array, optional
61
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
62
+ coefficients of a linear equality constraint on ``x``.
63
+ b_eq : 1-D array, optional
64
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
65
+ the corresponding element of ``b_eq``.
66
+ bounds : sequence, optional
67
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
68
+ the minimum and maximum values of that decision variable. Use ``None``
69
+ to indicate that there is no bound. By default, bounds are
70
+ ``(0, None)`` (all decision variables are non-negative).
71
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
72
+ ``max`` will serve as bounds for all decision variables.
73
+ method : str
74
+
75
+ This is the method-specific documentation for 'highs', which chooses
76
+ automatically between
77
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>` and
78
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
79
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
80
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
81
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
82
+ are also available.
83
+ integrality : 1-D array or int, optional
84
+ Indicates the type of integrality constraint on each decision variable.
85
+
86
+ ``0`` : Continuous variable; no integrality constraint.
87
+
88
+ ``1`` : Integer variable; decision variable must be an integer
89
+ within `bounds`.
90
+
91
+ ``2`` : Semi-continuous variable; decision variable must be within
92
+ `bounds` or take value ``0``.
93
+
94
+ ``3`` : Semi-integer variable; decision variable must be an integer
95
+ within `bounds` or take value ``0``.
96
+
97
+ By default, all variables are continuous.
98
+
99
+ For mixed integrality constraints, supply an array of shape `c.shape`.
100
+ To infer a constraint on each decision variable from shorter inputs,
101
+ the argument will be broadcasted to `c.shape` using `np.broadcast_to`.
102
+
103
+ This argument is currently used only by the ``'highs'`` method and
104
+ ignored otherwise.
105
+
106
+ Options
107
+ -------
108
+ maxiter : int
109
+ The maximum number of iterations to perform in either phase.
110
+ For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
111
+ include the number of crossover iterations. Default is the largest
112
+ possible value for an ``int`` on the platform.
113
+ disp : bool (default: ``False``)
114
+ Set to ``True`` if indicators of optimization status are to be
115
+ printed to the console during optimization.
116
+ presolve : bool (default: ``True``)
117
+ Presolve attempts to identify trivial infeasibilities,
118
+ identify trivial unboundedness, and simplify the problem before
119
+ sending it to the main solver. It is generally recommended
120
+ to keep the default setting ``True``; set to ``False`` if
121
+ presolve is to be disabled.
122
+ time_limit : float
123
+ The maximum time in seconds allotted to solve the problem;
124
+ default is the largest possible value for a ``double`` on the
125
+ platform.
126
+ dual_feasibility_tolerance : double (default: 1e-07)
127
+ Dual feasibility tolerance for
128
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
129
+ The minimum of this and ``primal_feasibility_tolerance``
130
+ is used for the feasibility tolerance of
131
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
132
+ primal_feasibility_tolerance : double (default: 1e-07)
133
+ Primal feasibility tolerance for
134
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
135
+ The minimum of this and ``dual_feasibility_tolerance``
136
+ is used for the feasibility tolerance of
137
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
138
+ ipm_optimality_tolerance : double (default: ``1e-08``)
139
+ Optimality tolerance for
140
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
141
+ Minimum allowable value is 1e-12.
142
+ simplex_dual_edge_weight_strategy : str (default: None)
143
+ Strategy for simplex dual edge weights. The default, ``None``,
144
+ automatically selects one of the following.
145
+
146
+ ``'dantzig'`` uses Dantzig's original strategy of choosing the most
147
+ negative reduced cost.
148
+
149
+ ``'devex'`` uses the strategy described in [15]_.
150
+
151
+ ``steepest`` uses the exact steepest edge strategy as described in
152
+ [16]_.
153
+
154
+ ``'steepest-devex'`` begins with the exact steepest edge strategy
155
+ until the computation is too costly or inexact and then switches to
156
+ the devex method.
157
+
158
+ Currently, ``None`` always selects ``'steepest-devex'``, but this
159
+ may change as new options become available.
160
+ mip_rel_gap : double (default: None)
161
+ Termination criterion for MIP solver: solver will terminate when the
162
+ gap between the primal objective value and the dual objective bound,
163
+ scaled by the primal objective value, is <= mip_rel_gap.
164
+ unknown_options : dict
165
+ Optional arguments not used by this particular solver. If
166
+ ``unknown_options`` is non-empty, a warning is issued listing
167
+ all unused options.
168
+
169
+ Returns
170
+ -------
171
+ res : OptimizeResult
172
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
173
+
174
+ x : 1D array
175
+ The values of the decision variables that minimizes the
176
+ objective function while satisfying the constraints.
177
+ fun : float
178
+ The optimal value of the objective function ``c @ x``.
179
+ slack : 1D array
180
+ The (nominally positive) values of the slack,
181
+ ``b_ub - A_ub @ x``.
182
+ con : 1D array
183
+ The (nominally zero) residuals of the equality constraints,
184
+ ``b_eq - A_eq @ x``.
185
+ success : bool
186
+ ``True`` when the algorithm succeeds in finding an optimal
187
+ solution.
188
+ status : int
189
+ An integer representing the exit status of the algorithm.
190
+
191
+ ``0`` : Optimization terminated successfully.
192
+
193
+ ``1`` : Iteration or time limit reached.
194
+
195
+ ``2`` : Problem appears to be infeasible.
196
+
197
+ ``3`` : Problem appears to be unbounded.
198
+
199
+ ``4`` : The HiGHS solver ran into a problem.
200
+
201
+ message : str
202
+ A string descriptor of the exit status of the algorithm.
203
+ nit : int
204
+ The total number of iterations performed.
205
+ For the HiGHS simplex method, this includes iterations in all
206
+ phases. For the HiGHS interior-point method, this does not include
207
+ crossover iterations.
208
+ crossover_nit : int
209
+ The number of primal/dual pushes performed during the
210
+ crossover routine for the HiGHS interior-point method.
211
+ This is ``0`` for the HiGHS simplex method.
212
+ ineqlin : OptimizeResult
213
+ Solution and sensitivity information corresponding to the
214
+ inequality constraints, `b_ub`. A dictionary consisting of the
215
+ fields:
216
+
217
+ residual : np.ndnarray
218
+ The (nominally positive) values of the slack variables,
219
+ ``b_ub - A_ub @ x``. This quantity is also commonly
220
+ referred to as "slack".
221
+
222
+ marginals : np.ndarray
223
+ The sensitivity (partial derivative) of the objective
224
+ function with respect to the right-hand side of the
225
+ inequality constraints, `b_ub`.
226
+
227
+ eqlin : OptimizeResult
228
+ Solution and sensitivity information corresponding to the
229
+ equality constraints, `b_eq`. A dictionary consisting of the
230
+ fields:
231
+
232
+ residual : np.ndarray
233
+ The (nominally zero) residuals of the equality constraints,
234
+ ``b_eq - A_eq @ x``.
235
+
236
+ marginals : np.ndarray
237
+ The sensitivity (partial derivative) of the objective
238
+ function with respect to the right-hand side of the
239
+ equality constraints, `b_eq`.
240
+
241
+ lower, upper : OptimizeResult
242
+ Solution and sensitivity information corresponding to the
243
+ lower and upper bounds on decision variables, `bounds`.
244
+
245
+ residual : np.ndarray
246
+ The (nominally positive) values of the quantity
247
+ ``x - lb`` (lower) or ``ub - x`` (upper).
248
+
249
+ marginals : np.ndarray
250
+ The sensitivity (partial derivative) of the objective
251
+ function with respect to the lower and upper
252
+ `bounds`.
253
+
254
+ Notes
255
+ -----
256
+
257
+ Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
258
+ of the C++ high performance dual revised simplex implementation (HSOL)
259
+ [13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
260
+ is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
261
+ **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
262
+ as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
263
+ between the two automatically. For new code involving `linprog`, we
264
+ recommend explicitly choosing one of these three method values instead of
265
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
266
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
267
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
268
+
269
+ The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
270
+ `marginals`, or partial derivatives of the objective function with respect
271
+ to the right-hand side of each constraint. These partial derivatives are
272
+ also referred to as "Lagrange multipliers", "dual values", and
273
+ "shadow prices". The sign convention of `marginals` is opposite that
274
+ of Lagrange multipliers produced by many nonlinear solvers.
275
+
276
+ References
277
+ ----------
278
+ .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
279
+ "HiGHS - high performance software for linear optimization."
280
+ https://highs.dev/
281
+ .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
282
+ simplex method." Mathematical Programming Computation, 10 (1),
283
+ 119-142, 2018. DOI: 10.1007/s12532-017-0130-5
284
+ .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
285
+ Mathematical programming 5.1 (1973): 1-28.
286
+ .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
287
+ simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
288
+ """
289
+ pass
290
+
291
+
292
+ def _linprog_highs_ds_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
293
+ bounds=None, method='highs-ds', callback=None,
294
+ maxiter=None, disp=False, presolve=True,
295
+ time_limit=None,
296
+ dual_feasibility_tolerance=None,
297
+ primal_feasibility_tolerance=None,
298
+ simplex_dual_edge_weight_strategy=None,
299
+ **unknown_options):
300
+ r"""
301
+ Linear programming: minimize a linear objective function subject to linear
302
+ equality and inequality constraints using the HiGHS dual simplex solver.
303
+
304
+ Linear programming solves problems of the following form:
305
+
306
+ .. math::
307
+
308
+ \min_x \ & c^T x \\
309
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
310
+ & A_{eq} x = b_{eq},\\
311
+ & l \leq x \leq u ,
312
+
313
+ where :math:`x` is a vector of decision variables; :math:`c`,
314
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
315
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
316
+
317
+ Alternatively, that's:
318
+
319
+ minimize::
320
+
321
+ c @ x
322
+
323
+ such that::
324
+
325
+ A_ub @ x <= b_ub
326
+ A_eq @ x == b_eq
327
+ lb <= x <= ub
328
+
329
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
330
+ ``bounds``.
331
+
332
+ Parameters
333
+ ----------
334
+ c : 1-D array
335
+ The coefficients of the linear objective function to be minimized.
336
+ A_ub : 2-D array, optional
337
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
338
+ coefficients of a linear inequality constraint on ``x``.
339
+ b_ub : 1-D array, optional
340
+ The inequality constraint vector. Each element represents an
341
+ upper bound on the corresponding value of ``A_ub @ x``.
342
+ A_eq : 2-D array, optional
343
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
344
+ coefficients of a linear equality constraint on ``x``.
345
+ b_eq : 1-D array, optional
346
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
347
+ the corresponding element of ``b_eq``.
348
+ bounds : sequence, optional
349
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
350
+ the minimum and maximum values of that decision variable. Use ``None``
351
+ to indicate that there is no bound. By default, bounds are
352
+ ``(0, None)`` (all decision variables are non-negative).
353
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
354
+ ``max`` will serve as bounds for all decision variables.
355
+ method : str
356
+
357
+ This is the method-specific documentation for 'highs-ds'.
358
+ :ref:`'highs' <optimize.linprog-highs>`,
359
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
360
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
361
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
362
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
363
+ are also available.
364
+
365
+ Options
366
+ -------
367
+ maxiter : int
368
+ The maximum number of iterations to perform in either phase.
369
+ Default is the largest possible value for an ``int`` on the platform.
370
+ disp : bool (default: ``False``)
371
+ Set to ``True`` if indicators of optimization status are to be
372
+ printed to the console during optimization.
373
+ presolve : bool (default: ``True``)
374
+ Presolve attempts to identify trivial infeasibilities,
375
+ identify trivial unboundedness, and simplify the problem before
376
+ sending it to the main solver. It is generally recommended
377
+ to keep the default setting ``True``; set to ``False`` if
378
+ presolve is to be disabled.
379
+ time_limit : float
380
+ The maximum time in seconds allotted to solve the problem;
381
+ default is the largest possible value for a ``double`` on the
382
+ platform.
383
+ dual_feasibility_tolerance : double (default: 1e-07)
384
+ Dual feasibility tolerance for
385
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
386
+ primal_feasibility_tolerance : double (default: 1e-07)
387
+ Primal feasibility tolerance for
388
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`.
389
+ simplex_dual_edge_weight_strategy : str (default: None)
390
+ Strategy for simplex dual edge weights. The default, ``None``,
391
+ automatically selects one of the following.
392
+
393
+ ``'dantzig'`` uses Dantzig's original strategy of choosing the most
394
+ negative reduced cost.
395
+
396
+ ``'devex'`` uses the strategy described in [15]_.
397
+
398
+ ``steepest`` uses the exact steepest edge strategy as described in
399
+ [16]_.
400
+
401
+ ``'steepest-devex'`` begins with the exact steepest edge strategy
402
+ until the computation is too costly or inexact and then switches to
403
+ the devex method.
404
+
405
+ Currently, ``None`` always selects ``'steepest-devex'``, but this
406
+ may change as new options become available.
407
+ unknown_options : dict
408
+ Optional arguments not used by this particular solver. If
409
+ ``unknown_options`` is non-empty, a warning is issued listing
410
+ all unused options.
411
+
412
+ Returns
413
+ -------
414
+ res : OptimizeResult
415
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
416
+
417
+ x : 1D array
418
+ The values of the decision variables that minimizes the
419
+ objective function while satisfying the constraints.
420
+ fun : float
421
+ The optimal value of the objective function ``c @ x``.
422
+ slack : 1D array
423
+ The (nominally positive) values of the slack,
424
+ ``b_ub - A_ub @ x``.
425
+ con : 1D array
426
+ The (nominally zero) residuals of the equality constraints,
427
+ ``b_eq - A_eq @ x``.
428
+ success : bool
429
+ ``True`` when the algorithm succeeds in finding an optimal
430
+ solution.
431
+ status : int
432
+ An integer representing the exit status of the algorithm.
433
+
434
+ ``0`` : Optimization terminated successfully.
435
+
436
+ ``1`` : Iteration or time limit reached.
437
+
438
+ ``2`` : Problem appears to be infeasible.
439
+
440
+ ``3`` : Problem appears to be unbounded.
441
+
442
+ ``4`` : The HiGHS solver ran into a problem.
443
+
444
+ message : str
445
+ A string descriptor of the exit status of the algorithm.
446
+ nit : int
447
+ The total number of iterations performed. This includes iterations
448
+ in all phases.
449
+ crossover_nit : int
450
+ This is always ``0`` for the HiGHS simplex method.
451
+ For the HiGHS interior-point method, this is the number of
452
+ primal/dual pushes performed during the crossover routine.
453
+ ineqlin : OptimizeResult
454
+ Solution and sensitivity information corresponding to the
455
+ inequality constraints, `b_ub`. A dictionary consisting of the
456
+ fields:
457
+
458
+ residual : np.ndnarray
459
+ The (nominally positive) values of the slack variables,
460
+ ``b_ub - A_ub @ x``. This quantity is also commonly
461
+ referred to as "slack".
462
+
463
+ marginals : np.ndarray
464
+ The sensitivity (partial derivative) of the objective
465
+ function with respect to the right-hand side of the
466
+ inequality constraints, `b_ub`.
467
+
468
+ eqlin : OptimizeResult
469
+ Solution and sensitivity information corresponding to the
470
+ equality constraints, `b_eq`. A dictionary consisting of the
471
+ fields:
472
+
473
+ residual : np.ndarray
474
+ The (nominally zero) residuals of the equality constraints,
475
+ ``b_eq - A_eq @ x``.
476
+
477
+ marginals : np.ndarray
478
+ The sensitivity (partial derivative) of the objective
479
+ function with respect to the right-hand side of the
480
+ equality constraints, `b_eq`.
481
+
482
+ lower, upper : OptimizeResult
483
+ Solution and sensitivity information corresponding to the
484
+ lower and upper bounds on decision variables, `bounds`.
485
+
486
+ residual : np.ndarray
487
+ The (nominally positive) values of the quantity
488
+ ``x - lb`` (lower) or ``ub - x`` (upper).
489
+
490
+ marginals : np.ndarray
491
+ The sensitivity (partial derivative) of the objective
492
+ function with respect to the lower and upper
493
+ `bounds`.
494
+
495
+ Notes
496
+ -----
497
+
498
+ Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
499
+ of the C++ high performance dual revised simplex implementation (HSOL)
500
+ [13]_, [14]_. Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
501
+ is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
502
+ **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
503
+ as a simplex solver. Method :ref:`'highs' <optimize.linprog-highs>` chooses
504
+ between the two automatically. For new code involving `linprog`, we
505
+ recommend explicitly choosing one of these three method values instead of
506
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
507
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
508
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
509
+
510
+ The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
511
+ `marginals`, or partial derivatives of the objective function with respect
512
+ to the right-hand side of each constraint. These partial derivatives are
513
+ also referred to as "Lagrange multipliers", "dual values", and
514
+ "shadow prices". The sign convention of `marginals` is opposite that
515
+ of Lagrange multipliers produced by many nonlinear solvers.
516
+
517
+ References
518
+ ----------
519
+ .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
520
+ "HiGHS - high performance software for linear optimization."
521
+ https://highs.dev/
522
+ .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
523
+ simplex method." Mathematical Programming Computation, 10 (1),
524
+ 119-142, 2018. DOI: 10.1007/s12532-017-0130-5
525
+ .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
526
+ Mathematical programming 5.1 (1973): 1-28.
527
+ .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
528
+ simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
529
+ """
530
+ pass
531
+
532
+
533
+ def _linprog_highs_ipm_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
534
+ bounds=None, method='highs-ipm', callback=None,
535
+ maxiter=None, disp=False, presolve=True,
536
+ time_limit=None,
537
+ dual_feasibility_tolerance=None,
538
+ primal_feasibility_tolerance=None,
539
+ ipm_optimality_tolerance=None,
540
+ **unknown_options):
541
+ r"""
542
+ Linear programming: minimize a linear objective function subject to linear
543
+ equality and inequality constraints using the HiGHS interior point solver.
544
+
545
+ Linear programming solves problems of the following form:
546
+
547
+ .. math::
548
+
549
+ \min_x \ & c^T x \\
550
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
551
+ & A_{eq} x = b_{eq},\\
552
+ & l \leq x \leq u ,
553
+
554
+ where :math:`x` is a vector of decision variables; :math:`c`,
555
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
556
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
557
+
558
+ Alternatively, that's:
559
+
560
+ minimize::
561
+
562
+ c @ x
563
+
564
+ such that::
565
+
566
+ A_ub @ x <= b_ub
567
+ A_eq @ x == b_eq
568
+ lb <= x <= ub
569
+
570
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
571
+ ``bounds``.
572
+
573
+ Parameters
574
+ ----------
575
+ c : 1-D array
576
+ The coefficients of the linear objective function to be minimized.
577
+ A_ub : 2-D array, optional
578
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
579
+ coefficients of a linear inequality constraint on ``x``.
580
+ b_ub : 1-D array, optional
581
+ The inequality constraint vector. Each element represents an
582
+ upper bound on the corresponding value of ``A_ub @ x``.
583
+ A_eq : 2-D array, optional
584
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
585
+ coefficients of a linear equality constraint on ``x``.
586
+ b_eq : 1-D array, optional
587
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
588
+ the corresponding element of ``b_eq``.
589
+ bounds : sequence, optional
590
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
591
+ the minimum and maximum values of that decision variable. Use ``None``
592
+ to indicate that there is no bound. By default, bounds are
593
+ ``(0, None)`` (all decision variables are non-negative).
594
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
595
+ ``max`` will serve as bounds for all decision variables.
596
+ method : str
597
+
598
+ This is the method-specific documentation for 'highs-ipm'.
599
+ :ref:`'highs-ipm' <optimize.linprog-highs>`,
600
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
601
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
602
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
603
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
604
+ are also available.
605
+
606
+ Options
607
+ -------
608
+ maxiter : int
609
+ The maximum number of iterations to perform in either phase.
610
+ For :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`, this does not
611
+ include the number of crossover iterations. Default is the largest
612
+ possible value for an ``int`` on the platform.
613
+ disp : bool (default: ``False``)
614
+ Set to ``True`` if indicators of optimization status are to be
615
+ printed to the console during optimization.
616
+ presolve : bool (default: ``True``)
617
+ Presolve attempts to identify trivial infeasibilities,
618
+ identify trivial unboundedness, and simplify the problem before
619
+ sending it to the main solver. It is generally recommended
620
+ to keep the default setting ``True``; set to ``False`` if
621
+ presolve is to be disabled.
622
+ time_limit : float
623
+ The maximum time in seconds allotted to solve the problem;
624
+ default is the largest possible value for a ``double`` on the
625
+ platform.
626
+ dual_feasibility_tolerance : double (default: 1e-07)
627
+ The minimum of this and ``primal_feasibility_tolerance``
628
+ is used for the feasibility tolerance of
629
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
630
+ primal_feasibility_tolerance : double (default: 1e-07)
631
+ The minimum of this and ``dual_feasibility_tolerance``
632
+ is used for the feasibility tolerance of
633
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
634
+ ipm_optimality_tolerance : double (default: ``1e-08``)
635
+ Optimality tolerance for
636
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`.
637
+ Minimum allowable value is 1e-12.
638
+ unknown_options : dict
639
+ Optional arguments not used by this particular solver. If
640
+ ``unknown_options`` is non-empty, a warning is issued listing
641
+ all unused options.
642
+
643
+ Returns
644
+ -------
645
+ res : OptimizeResult
646
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
647
+
648
+ x : 1D array
649
+ The values of the decision variables that minimizes the
650
+ objective function while satisfying the constraints.
651
+ fun : float
652
+ The optimal value of the objective function ``c @ x``.
653
+ slack : 1D array
654
+ The (nominally positive) values of the slack,
655
+ ``b_ub - A_ub @ x``.
656
+ con : 1D array
657
+ The (nominally zero) residuals of the equality constraints,
658
+ ``b_eq - A_eq @ x``.
659
+ success : bool
660
+ ``True`` when the algorithm succeeds in finding an optimal
661
+ solution.
662
+ status : int
663
+ An integer representing the exit status of the algorithm.
664
+
665
+ ``0`` : Optimization terminated successfully.
666
+
667
+ ``1`` : Iteration or time limit reached.
668
+
669
+ ``2`` : Problem appears to be infeasible.
670
+
671
+ ``3`` : Problem appears to be unbounded.
672
+
673
+ ``4`` : The HiGHS solver ran into a problem.
674
+
675
+ message : str
676
+ A string descriptor of the exit status of the algorithm.
677
+ nit : int
678
+ The total number of iterations performed.
679
+ For the HiGHS interior-point method, this does not include
680
+ crossover iterations.
681
+ crossover_nit : int
682
+ The number of primal/dual pushes performed during the
683
+ crossover routine for the HiGHS interior-point method.
684
+ ineqlin : OptimizeResult
685
+ Solution and sensitivity information corresponding to the
686
+ inequality constraints, `b_ub`. A dictionary consisting of the
687
+ fields:
688
+
689
+ residual : np.ndnarray
690
+ The (nominally positive) values of the slack variables,
691
+ ``b_ub - A_ub @ x``. This quantity is also commonly
692
+ referred to as "slack".
693
+
694
+ marginals : np.ndarray
695
+ The sensitivity (partial derivative) of the objective
696
+ function with respect to the right-hand side of the
697
+ inequality constraints, `b_ub`.
698
+
699
+ eqlin : OptimizeResult
700
+ Solution and sensitivity information corresponding to the
701
+ equality constraints, `b_eq`. A dictionary consisting of the
702
+ fields:
703
+
704
+ residual : np.ndarray
705
+ The (nominally zero) residuals of the equality constraints,
706
+ ``b_eq - A_eq @ x``.
707
+
708
+ marginals : np.ndarray
709
+ The sensitivity (partial derivative) of the objective
710
+ function with respect to the right-hand side of the
711
+ equality constraints, `b_eq`.
712
+
713
+ lower, upper : OptimizeResult
714
+ Solution and sensitivity information corresponding to the
715
+ lower and upper bounds on decision variables, `bounds`.
716
+
717
+ residual : np.ndarray
718
+ The (nominally positive) values of the quantity
719
+ ``x - lb`` (lower) or ``ub - x`` (upper).
720
+
721
+ marginals : np.ndarray
722
+ The sensitivity (partial derivative) of the objective
723
+ function with respect to the lower and upper
724
+ `bounds`.
725
+
726
+ Notes
727
+ -----
728
+
729
+ Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
730
+ is a wrapper of a C++ implementation of an **i**\ nterior-\ **p**\ oint
731
+ **m**\ ethod [13]_; it features a crossover routine, so it is as accurate
732
+ as a simplex solver.
733
+ Method :ref:`'highs-ds' <optimize.linprog-highs-ds>` is a wrapper
734
+ of the C++ high performance dual revised simplex implementation (HSOL)
735
+ [13]_, [14]_. Method :ref:`'highs' <optimize.linprog-highs>` chooses
736
+ between the two automatically. For new code involving `linprog`, we
737
+ recommend explicitly choosing one of these three method values instead of
738
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
739
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
740
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy).
741
+
742
+ The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
743
+ `marginals`, or partial derivatives of the objective function with respect
744
+ to the right-hand side of each constraint. These partial derivatives are
745
+ also referred to as "Lagrange multipliers", "dual values", and
746
+ "shadow prices". The sign convention of `marginals` is opposite that
747
+ of Lagrange multipliers produced by many nonlinear solvers.
748
+
749
+ References
750
+ ----------
751
+ .. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
752
+ "HiGHS - high performance software for linear optimization."
753
+ https://highs.dev/
754
+ .. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
755
+ simplex method." Mathematical Programming Computation, 10 (1),
756
+ 119-142, 2018. DOI: 10.1007/s12532-017-0130-5
757
+ """
758
+ pass
759
+
760
+
761
+ def _linprog_ip_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
762
+ bounds=None, method='interior-point', callback=None,
763
+ maxiter=1000, disp=False, presolve=True,
764
+ tol=1e-8, autoscale=False, rr=True,
765
+ alpha0=.99995, beta=0.1, sparse=False,
766
+ lstsq=False, sym_pos=True, cholesky=True, pc=True,
767
+ ip=False, permc_spec='MMD_AT_PLUS_A', **unknown_options):
768
+ r"""
769
+ Linear programming: minimize a linear objective function subject to linear
770
+ equality and inequality constraints using the interior-point method of
771
+ [4]_.
772
+
773
+ .. deprecated:: 1.9.0
774
+ `method='interior-point'` will be removed in SciPy 1.11.0.
775
+ It is replaced by `method='highs'` because the latter is
776
+ faster and more robust.
777
+
778
+ Linear programming solves problems of the following form:
779
+
780
+ .. math::
781
+
782
+ \min_x \ & c^T x \\
783
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
784
+ & A_{eq} x = b_{eq},\\
785
+ & l \leq x \leq u ,
786
+
787
+ where :math:`x` is a vector of decision variables; :math:`c`,
788
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
789
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
790
+
791
+ Alternatively, that's:
792
+
793
+ minimize::
794
+
795
+ c @ x
796
+
797
+ such that::
798
+
799
+ A_ub @ x <= b_ub
800
+ A_eq @ x == b_eq
801
+ lb <= x <= ub
802
+
803
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
804
+ ``bounds``.
805
+
806
+ Parameters
807
+ ----------
808
+ c : 1-D array
809
+ The coefficients of the linear objective function to be minimized.
810
+ A_ub : 2-D array, optional
811
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
812
+ coefficients of a linear inequality constraint on ``x``.
813
+ b_ub : 1-D array, optional
814
+ The inequality constraint vector. Each element represents an
815
+ upper bound on the corresponding value of ``A_ub @ x``.
816
+ A_eq : 2-D array, optional
817
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
818
+ coefficients of a linear equality constraint on ``x``.
819
+ b_eq : 1-D array, optional
820
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
821
+ the corresponding element of ``b_eq``.
822
+ bounds : sequence, optional
823
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
824
+ the minimum and maximum values of that decision variable. Use ``None``
825
+ to indicate that there is no bound. By default, bounds are
826
+ ``(0, None)`` (all decision variables are non-negative).
827
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
828
+ ``max`` will serve as bounds for all decision variables.
829
+ method : str
830
+ This is the method-specific documentation for 'interior-point'.
831
+ :ref:`'highs' <optimize.linprog-highs>`,
832
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
833
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
834
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
835
+ :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
836
+ are also available.
837
+ callback : callable, optional
838
+ Callback function to be executed once per iteration.
839
+
840
+ Options
841
+ -------
842
+ maxiter : int (default: 1000)
843
+ The maximum number of iterations of the algorithm.
844
+ disp : bool (default: False)
845
+ Set to ``True`` if indicators of optimization status are to be printed
846
+ to the console each iteration.
847
+ presolve : bool (default: True)
848
+ Presolve attempts to identify trivial infeasibilities,
849
+ identify trivial unboundedness, and simplify the problem before
850
+ sending it to the main solver. It is generally recommended
851
+ to keep the default setting ``True``; set to ``False`` if
852
+ presolve is to be disabled.
853
+ tol : float (default: 1e-8)
854
+ Termination tolerance to be used for all termination criteria;
855
+ see [4]_ Section 4.5.
856
+ autoscale : bool (default: False)
857
+ Set to ``True`` to automatically perform equilibration.
858
+ Consider using this option if the numerical values in the
859
+ constraints are separated by several orders of magnitude.
860
+ rr : bool (default: True)
861
+ Set to ``False`` to disable automatic redundancy removal.
862
+ alpha0 : float (default: 0.99995)
863
+ The maximal step size for Mehrota's predictor-corrector search
864
+ direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
865
+ beta : float (default: 0.1)
866
+ The desired reduction of the path parameter :math:`\mu` (see [6]_)
867
+ when Mehrota's predictor-corrector is not in use (uncommon).
868
+ sparse : bool (default: False)
869
+ Set to ``True`` if the problem is to be treated as sparse after
870
+ presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
871
+ this option will automatically be set ``True``, and the problem
872
+ will be treated as sparse even during presolve. If your constraint
873
+ matrices contain mostly zeros and the problem is not very small (less
874
+ than about 100 constraints or variables), consider setting ``True``
875
+ or providing ``A_eq`` and ``A_ub`` as sparse matrices.
876
+ lstsq : bool (default: ``False``)
877
+ Set to ``True`` if the problem is expected to be very poorly
878
+ conditioned. This should always be left ``False`` unless severe
879
+ numerical difficulties are encountered. Leave this at the default
880
+ unless you receive a warning message suggesting otherwise.
881
+ sym_pos : bool (default: True)
882
+ Leave ``True`` if the problem is expected to yield a well conditioned
883
+ symmetric positive definite normal equation matrix
884
+ (almost always). Leave this at the default unless you receive
885
+ a warning message suggesting otherwise.
886
+ cholesky : bool (default: True)
887
+ Set to ``True`` if the normal equations are to be solved by explicit
888
+ Cholesky decomposition followed by explicit forward/backward
889
+ substitution. This is typically faster for problems
890
+ that are numerically well-behaved.
891
+ pc : bool (default: True)
892
+ Leave ``True`` if the predictor-corrector method of Mehrota is to be
893
+ used. This is almost always (if not always) beneficial.
894
+ ip : bool (default: False)
895
+ Set to ``True`` if the improved initial point suggestion due to [4]_
896
+ Section 4.3 is desired. Whether this is beneficial or not
897
+ depends on the problem.
898
+ permc_spec : str (default: 'MMD_AT_PLUS_A')
899
+ (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
900
+ True``, and no SuiteSparse.)
901
+ A matrix is factorized in each iteration of the algorithm.
902
+ This option specifies how to permute the columns of the matrix for
903
+ sparsity preservation. Acceptable values are:
904
+
905
+ - ``NATURAL``: natural ordering.
906
+ - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
907
+ - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
908
+ - ``COLAMD``: approximate minimum degree column ordering.
909
+
910
+ This option can impact the convergence of the
911
+ interior point algorithm; test different values to determine which
912
+ performs best for your problem. For more information, refer to
913
+ ``scipy.sparse.linalg.splu``.
914
+ unknown_options : dict
915
+ Optional arguments not used by this particular solver. If
916
+ `unknown_options` is non-empty a warning is issued listing all
917
+ unused options.
918
+
919
+ Returns
920
+ -------
921
+ res : OptimizeResult
922
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
923
+
924
+ x : 1-D array
925
+ The values of the decision variables that minimizes the
926
+ objective function while satisfying the constraints.
927
+ fun : float
928
+ The optimal value of the objective function ``c @ x``.
929
+ slack : 1-D array
930
+ The (nominally positive) values of the slack variables,
931
+ ``b_ub - A_ub @ x``.
932
+ con : 1-D array
933
+ The (nominally zero) residuals of the equality constraints,
934
+ ``b_eq - A_eq @ x``.
935
+ success : bool
936
+ ``True`` when the algorithm succeeds in finding an optimal
937
+ solution.
938
+ status : int
939
+ An integer representing the exit status of the algorithm.
940
+
941
+ ``0`` : Optimization terminated successfully.
942
+
943
+ ``1`` : Iteration limit reached.
944
+
945
+ ``2`` : Problem appears to be infeasible.
946
+
947
+ ``3`` : Problem appears to be unbounded.
948
+
949
+ ``4`` : Numerical difficulties encountered.
950
+
951
+ message : str
952
+ A string descriptor of the exit status of the algorithm.
953
+ nit : int
954
+ The total number of iterations performed in all phases.
955
+
956
+
957
+ Notes
958
+ -----
959
+ This method implements the algorithm outlined in [4]_ with ideas from [8]_
960
+ and a structure inspired by the simpler methods of [6]_.
961
+
962
+ The primal-dual path following method begins with initial 'guesses' of
963
+ the primal and dual variables of the standard form problem and iteratively
964
+ attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
965
+ problem with a gradually reduced logarithmic barrier term added to the
966
+ objective. This particular implementation uses a homogeneous self-dual
967
+ formulation, which provides certificates of infeasibility or unboundedness
968
+ where applicable.
969
+
970
+ The default initial point for the primal and dual variables is that
971
+ defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
972
+ point option ``ip=True``), an alternate (potentially improved) starting
973
+ point can be calculated according to the additional recommendations of
974
+ [4]_ Section 4.4.
975
+
976
+ A search direction is calculated using the predictor-corrector method
977
+ (single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
978
+ (A potential improvement would be to implement the method of multiple
979
+ corrections described in [4]_ Section 4.2.) In practice, this is
980
+ accomplished by solving the normal equations, [4]_ Section 5.1 Equations
981
+ 8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
982
+ 8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
983
+ solving the normal equations rather than 8.25 directly is that the
984
+ matrices involved are symmetric positive definite, so Cholesky
985
+ decomposition can be used rather than the more expensive LU factorization.
986
+
987
+ With default options, the solver used to perform the factorization depends
988
+ on third-party software availability and the conditioning of the problem.
989
+
990
+ For dense problems, solvers are tried in the following order:
991
+
992
+ 1. ``scipy.linalg.cho_factor``
993
+
994
+ 2. ``scipy.linalg.solve`` with option ``sym_pos=True``
995
+
996
+ 3. ``scipy.linalg.solve`` with option ``sym_pos=False``
997
+
998
+ 4. ``scipy.linalg.lstsq``
999
+
1000
+ For sparse problems:
1001
+
1002
+ 1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are
1003
+ installed)
1004
+
1005
+ 2. ``scipy.sparse.linalg.factorized`` (if scikit-umfpack and SuiteSparse
1006
+ are installed)
1007
+
1008
+ 3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
1009
+
1010
+ 4. ``scipy.sparse.linalg.lsqr``
1011
+
1012
+ If the solver fails for any reason, successively more robust (but slower)
1013
+ solvers are attempted in the order indicated. Attempting, failing, and
1014
+ re-starting factorization can be time consuming, so if the problem is
1015
+ numerically challenging, options can be set to bypass solvers that are
1016
+ failing. Setting ``cholesky=False`` skips to solver 2,
1017
+ ``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
1018
+ to solver 4 for both sparse and dense problems.
1019
+
1020
+ Potential improvements for combatting issues associated with dense
1021
+ columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
1022
+ [10]_ Section 4.1-4.2; the latter also discusses the alleviation of
1023
+ accuracy issues associated with the substitution approach to free
1024
+ variables.
1025
+
1026
+ After calculating the search direction, the maximum possible step size
1027
+ that does not activate the non-negativity constraints is calculated, and
1028
+ the smaller of this step size and unity is applied (as in [4]_ Section
1029
+ 4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
1030
+
1031
+ The new point is tested according to the termination conditions of [4]_
1032
+ Section 4.5. The same tolerance, which can be set using the ``tol`` option,
1033
+ is used for all checks. (A potential improvement would be to expose
1034
+ the different tolerances to be set independently.) If optimality,
1035
+ unboundedness, or infeasibility is detected, the solve procedure
1036
+ terminates; otherwise it repeats.
1037
+
1038
+ Whereas the top level ``linprog`` module expects a problem of form:
1039
+
1040
+ Minimize::
1041
+
1042
+ c @ x
1043
+
1044
+ Subject to::
1045
+
1046
+ A_ub @ x <= b_ub
1047
+ A_eq @ x == b_eq
1048
+ lb <= x <= ub
1049
+
1050
+ where ``lb = 0`` and ``ub = None`` unless set in ``bounds``. The problem
1051
+ is automatically converted to the form:
1052
+
1053
+ Minimize::
1054
+
1055
+ c @ x
1056
+
1057
+ Subject to::
1058
+
1059
+ A @ x == b
1060
+ x >= 0
1061
+
1062
+ for solution. That is, the original problem contains equality, upper-bound
1063
+ and variable constraints whereas the method specific solver requires
1064
+ equality constraints and variable non-negativity. ``linprog`` converts the
1065
+ original problem to standard form by converting the simple bounds to upper
1066
+ bound constraints, introducing non-negative slack variables for inequality
1067
+ constraints, and expressing unbounded variables as the difference between
1068
+ two non-negative variables. The problem is converted back to the original
1069
+ form before results are reported.
1070
+
1071
+ References
1072
+ ----------
1073
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
1074
+ optimizer for linear programming: an implementation of the
1075
+ homogeneous algorithm." High performance optimization. Springer US,
1076
+ 2000. 197-232.
1077
+ .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
1078
+ Programming based on Newton's Method." Unpublished Course Notes,
1079
+ March 2004. Available 2/25/2017 at
1080
+ https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
1081
+ .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
1082
+ programming." Mathematical Programming 71.2 (1995): 221-245.
1083
+ .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
1084
+ programming." Athena Scientific 1 (1997): 997.
1085
+ .. [10] Andersen, Erling D., et al. Implementation of interior point
1086
+ methods for large scale linear programming. HEC/Universite de
1087
+ Geneve, 1996.
1088
+ """
1089
+ pass
1090
+
1091
+
1092
+ def _linprog_rs_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
1093
+ bounds=None, method='interior-point', callback=None,
1094
+ x0=None, maxiter=5000, disp=False, presolve=True,
1095
+ tol=1e-12, autoscale=False, rr=True, maxupdate=10,
1096
+ mast=False, pivot="mrc", **unknown_options):
1097
+ r"""
1098
+ Linear programming: minimize a linear objective function subject to linear
1099
+ equality and inequality constraints using the revised simplex method.
1100
+
1101
+ .. deprecated:: 1.9.0
1102
+ `method='revised simplex'` will be removed in SciPy 1.11.0.
1103
+ It is replaced by `method='highs'` because the latter is
1104
+ faster and more robust.
1105
+
1106
+ Linear programming solves problems of the following form:
1107
+
1108
+ .. math::
1109
+
1110
+ \min_x \ & c^T x \\
1111
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
1112
+ & A_{eq} x = b_{eq},\\
1113
+ & l \leq x \leq u ,
1114
+
1115
+ where :math:`x` is a vector of decision variables; :math:`c`,
1116
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
1117
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
1118
+
1119
+ Alternatively, that's:
1120
+
1121
+ minimize::
1122
+
1123
+ c @ x
1124
+
1125
+ such that::
1126
+
1127
+ A_ub @ x <= b_ub
1128
+ A_eq @ x == b_eq
1129
+ lb <= x <= ub
1130
+
1131
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
1132
+ ``bounds``.
1133
+
1134
+ Parameters
1135
+ ----------
1136
+ c : 1-D array
1137
+ The coefficients of the linear objective function to be minimized.
1138
+ A_ub : 2-D array, optional
1139
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
1140
+ coefficients of a linear inequality constraint on ``x``.
1141
+ b_ub : 1-D array, optional
1142
+ The inequality constraint vector. Each element represents an
1143
+ upper bound on the corresponding value of ``A_ub @ x``.
1144
+ A_eq : 2-D array, optional
1145
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
1146
+ coefficients of a linear equality constraint on ``x``.
1147
+ b_eq : 1-D array, optional
1148
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
1149
+ the corresponding element of ``b_eq``.
1150
+ bounds : sequence, optional
1151
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
1152
+ the minimum and maximum values of that decision variable. Use ``None``
1153
+ to indicate that there is no bound. By default, bounds are
1154
+ ``(0, None)`` (all decision variables are non-negative).
1155
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
1156
+ ``max`` will serve as bounds for all decision variables.
1157
+ method : str
1158
+ This is the method-specific documentation for 'revised simplex'.
1159
+ :ref:`'highs' <optimize.linprog-highs>`,
1160
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
1161
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
1162
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
1163
+ and :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
1164
+ are also available.
1165
+ callback : callable, optional
1166
+ Callback function to be executed once per iteration.
1167
+ x0 : 1-D array, optional
1168
+ Guess values of the decision variables, which will be refined by
1169
+ the optimization algorithm. This argument is currently used only by the
1170
+ 'revised simplex' method, and can only be used if `x0` represents a
1171
+ basic feasible solution.
1172
+
1173
+ Options
1174
+ -------
1175
+ maxiter : int (default: 5000)
1176
+ The maximum number of iterations to perform in either phase.
1177
+ disp : bool (default: False)
1178
+ Set to ``True`` if indicators of optimization status are to be printed
1179
+ to the console each iteration.
1180
+ presolve : bool (default: True)
1181
+ Presolve attempts to identify trivial infeasibilities,
1182
+ identify trivial unboundedness, and simplify the problem before
1183
+ sending it to the main solver. It is generally recommended
1184
+ to keep the default setting ``True``; set to ``False`` if
1185
+ presolve is to be disabled.
1186
+ tol : float (default: 1e-12)
1187
+ The tolerance which determines when a solution is "close enough" to
1188
+ zero in Phase 1 to be considered a basic feasible solution or close
1189
+ enough to positive to serve as an optimal solution.
1190
+ autoscale : bool (default: False)
1191
+ Set to ``True`` to automatically perform equilibration.
1192
+ Consider using this option if the numerical values in the
1193
+ constraints are separated by several orders of magnitude.
1194
+ rr : bool (default: True)
1195
+ Set to ``False`` to disable automatic redundancy removal.
1196
+ maxupdate : int (default: 10)
1197
+ The maximum number of updates performed on the LU factorization.
1198
+ After this many updates is reached, the basis matrix is factorized
1199
+ from scratch.
1200
+ mast : bool (default: False)
1201
+ Minimize Amortized Solve Time. If enabled, the average time to solve
1202
+ a linear system using the basis factorization is measured. Typically,
1203
+ the average solve time will decrease with each successive solve after
1204
+ initial factorization, as factorization takes much more time than the
1205
+ solve operation (and updates). Eventually, however, the updated
1206
+ factorization becomes sufficiently complex that the average solve time
1207
+ begins to increase. When this is detected, the basis is refactorized
1208
+ from scratch. Enable this option to maximize speed at the risk of
1209
+ nondeterministic behavior. Ignored if ``maxupdate`` is 0.
1210
+ pivot : "mrc" or "bland" (default: "mrc")
1211
+ Pivot rule: Minimum Reduced Cost ("mrc") or Bland's rule ("bland").
1212
+ Choose Bland's rule if iteration limit is reached and cycling is
1213
+ suspected.
1214
+ unknown_options : dict
1215
+ Optional arguments not used by this particular solver. If
1216
+ `unknown_options` is non-empty a warning is issued listing all
1217
+ unused options.
1218
+
1219
+ Returns
1220
+ -------
1221
+ res : OptimizeResult
1222
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
1223
+
1224
+ x : 1-D array
1225
+ The values of the decision variables that minimizes the
1226
+ objective function while satisfying the constraints.
1227
+ fun : float
1228
+ The optimal value of the objective function ``c @ x``.
1229
+ slack : 1-D array
1230
+ The (nominally positive) values of the slack variables,
1231
+ ``b_ub - A_ub @ x``.
1232
+ con : 1-D array
1233
+ The (nominally zero) residuals of the equality constraints,
1234
+ ``b_eq - A_eq @ x``.
1235
+ success : bool
1236
+ ``True`` when the algorithm succeeds in finding an optimal
1237
+ solution.
1238
+ status : int
1239
+ An integer representing the exit status of the algorithm.
1240
+
1241
+ ``0`` : Optimization terminated successfully.
1242
+
1243
+ ``1`` : Iteration limit reached.
1244
+
1245
+ ``2`` : Problem appears to be infeasible.
1246
+
1247
+ ``3`` : Problem appears to be unbounded.
1248
+
1249
+ ``4`` : Numerical difficulties encountered.
1250
+
1251
+ ``5`` : Problem has no constraints; turn presolve on.
1252
+
1253
+ ``6`` : Invalid guess provided.
1254
+
1255
+ message : str
1256
+ A string descriptor of the exit status of the algorithm.
1257
+ nit : int
1258
+ The total number of iterations performed in all phases.
1259
+
1260
+
1261
+ Notes
1262
+ -----
1263
+ Method *revised simplex* uses the revised simplex method as described in
1264
+ [9]_, except that a factorization [11]_ of the basis matrix, rather than
1265
+ its inverse, is efficiently maintained and used to solve the linear systems
1266
+ at each iteration of the algorithm.
1267
+
1268
+ References
1269
+ ----------
1270
+ .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
1271
+ programming." Athena Scientific 1 (1997): 997.
1272
+ .. [11] Bartels, Richard H. "A stabilization of the simplex method."
1273
+ Journal in Numerische Mathematik 16.5 (1971): 414-434.
1274
+ """
1275
+ pass
1276
+
1277
+
1278
+ def _linprog_simplex_doc(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
1279
+ bounds=None, method='interior-point', callback=None,
1280
+ maxiter=5000, disp=False, presolve=True,
1281
+ tol=1e-12, autoscale=False, rr=True, bland=False,
1282
+ **unknown_options):
1283
+ r"""
1284
+ Linear programming: minimize a linear objective function subject to linear
1285
+ equality and inequality constraints using the tableau-based simplex method.
1286
+
1287
+ .. deprecated:: 1.9.0
1288
+ `method='simplex'` will be removed in SciPy 1.11.0.
1289
+ It is replaced by `method='highs'` because the latter is
1290
+ faster and more robust.
1291
+
1292
+ Linear programming solves problems of the following form:
1293
+
1294
+ .. math::
1295
+
1296
+ \min_x \ & c^T x \\
1297
+ \mbox{such that} \ & A_{ub} x \leq b_{ub},\\
1298
+ & A_{eq} x = b_{eq},\\
1299
+ & l \leq x \leq u ,
1300
+
1301
+ where :math:`x` is a vector of decision variables; :math:`c`,
1302
+ :math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
1303
+ :math:`A_{ub}` and :math:`A_{eq}` are matrices.
1304
+
1305
+ Alternatively, that's:
1306
+
1307
+ minimize::
1308
+
1309
+ c @ x
1310
+
1311
+ such that::
1312
+
1313
+ A_ub @ x <= b_ub
1314
+ A_eq @ x == b_eq
1315
+ lb <= x <= ub
1316
+
1317
+ Note that by default ``lb = 0`` and ``ub = None`` unless specified with
1318
+ ``bounds``.
1319
+
1320
+ Parameters
1321
+ ----------
1322
+ c : 1-D array
1323
+ The coefficients of the linear objective function to be minimized.
1324
+ A_ub : 2-D array, optional
1325
+ The inequality constraint matrix. Each row of ``A_ub`` specifies the
1326
+ coefficients of a linear inequality constraint on ``x``.
1327
+ b_ub : 1-D array, optional
1328
+ The inequality constraint vector. Each element represents an
1329
+ upper bound on the corresponding value of ``A_ub @ x``.
1330
+ A_eq : 2-D array, optional
1331
+ The equality constraint matrix. Each row of ``A_eq`` specifies the
1332
+ coefficients of a linear equality constraint on ``x``.
1333
+ b_eq : 1-D array, optional
1334
+ The equality constraint vector. Each element of ``A_eq @ x`` must equal
1335
+ the corresponding element of ``b_eq``.
1336
+ bounds : sequence, optional
1337
+ A sequence of ``(min, max)`` pairs for each element in ``x``, defining
1338
+ the minimum and maximum values of that decision variable. Use ``None``
1339
+ to indicate that there is no bound. By default, bounds are
1340
+ ``(0, None)`` (all decision variables are non-negative).
1341
+ If a single tuple ``(min, max)`` is provided, then ``min`` and
1342
+ ``max`` will serve as bounds for all decision variables.
1343
+ method : str
1344
+ This is the method-specific documentation for 'simplex'.
1345
+ :ref:`'highs' <optimize.linprog-highs>`,
1346
+ :ref:`'highs-ds' <optimize.linprog-highs-ds>`,
1347
+ :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`,
1348
+ :ref:`'interior-point' <optimize.linprog-interior-point>` (default),
1349
+ and :ref:`'revised simplex' <optimize.linprog-revised_simplex>`
1350
+ are also available.
1351
+ callback : callable, optional
1352
+ Callback function to be executed once per iteration.
1353
+
1354
+ Options
1355
+ -------
1356
+ maxiter : int (default: 5000)
1357
+ The maximum number of iterations to perform in either phase.
1358
+ disp : bool (default: False)
1359
+ Set to ``True`` if indicators of optimization status are to be printed
1360
+ to the console each iteration.
1361
+ presolve : bool (default: True)
1362
+ Presolve attempts to identify trivial infeasibilities,
1363
+ identify trivial unboundedness, and simplify the problem before
1364
+ sending it to the main solver. It is generally recommended
1365
+ to keep the default setting ``True``; set to ``False`` if
1366
+ presolve is to be disabled.
1367
+ tol : float (default: 1e-12)
1368
+ The tolerance which determines when a solution is "close enough" to
1369
+ zero in Phase 1 to be considered a basic feasible solution or close
1370
+ enough to positive to serve as an optimal solution.
1371
+ autoscale : bool (default: False)
1372
+ Set to ``True`` to automatically perform equilibration.
1373
+ Consider using this option if the numerical values in the
1374
+ constraints are separated by several orders of magnitude.
1375
+ rr : bool (default: True)
1376
+ Set to ``False`` to disable automatic redundancy removal.
1377
+ bland : bool
1378
+ If True, use Bland's anti-cycling rule [3]_ to choose pivots to
1379
+ prevent cycling. If False, choose pivots which should lead to a
1380
+ converged solution more quickly. The latter method is subject to
1381
+ cycling (non-convergence) in rare instances.
1382
+ unknown_options : dict
1383
+ Optional arguments not used by this particular solver. If
1384
+ `unknown_options` is non-empty a warning is issued listing all
1385
+ unused options.
1386
+
1387
+ Returns
1388
+ -------
1389
+ res : OptimizeResult
1390
+ A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
1391
+
1392
+ x : 1-D array
1393
+ The values of the decision variables that minimizes the
1394
+ objective function while satisfying the constraints.
1395
+ fun : float
1396
+ The optimal value of the objective function ``c @ x``.
1397
+ slack : 1-D array
1398
+ The (nominally positive) values of the slack variables,
1399
+ ``b_ub - A_ub @ x``.
1400
+ con : 1-D array
1401
+ The (nominally zero) residuals of the equality constraints,
1402
+ ``b_eq - A_eq @ x``.
1403
+ success : bool
1404
+ ``True`` when the algorithm succeeds in finding an optimal
1405
+ solution.
1406
+ status : int
1407
+ An integer representing the exit status of the algorithm.
1408
+
1409
+ ``0`` : Optimization terminated successfully.
1410
+
1411
+ ``1`` : Iteration limit reached.
1412
+
1413
+ ``2`` : Problem appears to be infeasible.
1414
+
1415
+ ``3`` : Problem appears to be unbounded.
1416
+
1417
+ ``4`` : Numerical difficulties encountered.
1418
+
1419
+ message : str
1420
+ A string descriptor of the exit status of the algorithm.
1421
+ nit : int
1422
+ The total number of iterations performed in all phases.
1423
+
1424
+ References
1425
+ ----------
1426
+ .. [1] Dantzig, George B., Linear programming and extensions. Rand
1427
+ Corporation Research Study Princeton Univ. Press, Princeton, NJ,
1428
+ 1963
1429
+ .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
1430
+ Mathematical Programming", McGraw-Hill, Chapter 4.
1431
+ .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
1432
+ Mathematics of Operations Research (2), 1977: pp. 103-107.
1433
+ """
1434
+ pass
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_highs.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HiGHS Linear Optimization Methods
2
+
3
+ Interface to HiGHS linear optimization software.
4
+ https://highs.dev/
5
+
6
+ .. versionadded:: 1.5.0
7
+
8
+ References
9
+ ----------
10
+ .. [1] Q. Huangfu and J.A.J. Hall. "Parallelizing the dual revised simplex
11
+ method." Mathematical Programming Computation, 10 (1), 119-142,
12
+ 2018. DOI: 10.1007/s12532-017-0130-5
13
+
14
+ """
15
+
16
+ import inspect
17
+ import numpy as np
18
+ from ._optimize import OptimizeWarning, OptimizeResult
19
+ from warnings import warn
20
+ from ._highs._highs_wrapper import _highs_wrapper
21
+ from ._highs._highs_constants import (
22
+ CONST_INF,
23
+ MESSAGE_LEVEL_NONE,
24
+ HIGHS_OBJECTIVE_SENSE_MINIMIZE,
25
+
26
+ MODEL_STATUS_NOTSET,
27
+ MODEL_STATUS_LOAD_ERROR,
28
+ MODEL_STATUS_MODEL_ERROR,
29
+ MODEL_STATUS_PRESOLVE_ERROR,
30
+ MODEL_STATUS_SOLVE_ERROR,
31
+ MODEL_STATUS_POSTSOLVE_ERROR,
32
+ MODEL_STATUS_MODEL_EMPTY,
33
+ MODEL_STATUS_OPTIMAL,
34
+ MODEL_STATUS_INFEASIBLE,
35
+ MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE,
36
+ MODEL_STATUS_UNBOUNDED,
37
+ MODEL_STATUS_REACHED_DUAL_OBJECTIVE_VALUE_UPPER_BOUND
38
+ as MODEL_STATUS_RDOVUB,
39
+ MODEL_STATUS_REACHED_OBJECTIVE_TARGET,
40
+ MODEL_STATUS_REACHED_TIME_LIMIT,
41
+ MODEL_STATUS_REACHED_ITERATION_LIMIT,
42
+
43
+ HIGHS_SIMPLEX_STRATEGY_DUAL,
44
+
45
+ HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
46
+
47
+ HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
48
+ HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
49
+ HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
50
+ HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
51
+ )
52
+ from scipy.sparse import csc_matrix, vstack, issparse
53
+
54
+
55
+ def _highs_to_scipy_status_message(highs_status, highs_message):
56
+ """Converts HiGHS status number/message to SciPy status number/message"""
57
+
58
+ scipy_statuses_messages = {
59
+ None: (4, "HiGHS did not provide a status code. "),
60
+ MODEL_STATUS_NOTSET: (4, ""),
61
+ MODEL_STATUS_LOAD_ERROR: (4, ""),
62
+ MODEL_STATUS_MODEL_ERROR: (2, ""),
63
+ MODEL_STATUS_PRESOLVE_ERROR: (4, ""),
64
+ MODEL_STATUS_SOLVE_ERROR: (4, ""),
65
+ MODEL_STATUS_POSTSOLVE_ERROR: (4, ""),
66
+ MODEL_STATUS_MODEL_EMPTY: (4, ""),
67
+ MODEL_STATUS_RDOVUB: (4, ""),
68
+ MODEL_STATUS_REACHED_OBJECTIVE_TARGET: (4, ""),
69
+ MODEL_STATUS_OPTIMAL: (0, "Optimization terminated successfully. "),
70
+ MODEL_STATUS_REACHED_TIME_LIMIT: (1, "Time limit reached. "),
71
+ MODEL_STATUS_REACHED_ITERATION_LIMIT: (1, "Iteration limit reached. "),
72
+ MODEL_STATUS_INFEASIBLE: (2, "The problem is infeasible. "),
73
+ MODEL_STATUS_UNBOUNDED: (3, "The problem is unbounded. "),
74
+ MODEL_STATUS_UNBOUNDED_OR_INFEASIBLE: (4, "The problem is unbounded "
75
+ "or infeasible. ")}
76
+ unrecognized = (4, "The HiGHS status code was not recognized. ")
77
+ scipy_status, scipy_message = (
78
+ scipy_statuses_messages.get(highs_status, unrecognized))
79
+ scipy_message = (f"{scipy_message}"
80
+ f"(HiGHS Status {highs_status}: {highs_message})")
81
+ return scipy_status, scipy_message
82
+
83
+
84
+ def _replace_inf(x):
85
+ # Replace `np.inf` with CONST_INF
86
+ infs = np.isinf(x)
87
+ with np.errstate(invalid="ignore"):
88
+ x[infs] = np.sign(x[infs])*CONST_INF
89
+ return x
90
+
91
+
92
+ def _convert_to_highs_enum(option, option_str, choices):
93
+ # If option is in the choices we can look it up, if not use
94
+ # the default value taken from function signature and warn:
95
+ try:
96
+ return choices[option.lower()]
97
+ except AttributeError:
98
+ return choices[option]
99
+ except KeyError:
100
+ sig = inspect.signature(_linprog_highs)
101
+ default_str = sig.parameters[option_str].default
102
+ warn(f"Option {option_str} is {option}, but only values in "
103
+ f"{set(choices.keys())} are allowed. Using default: "
104
+ f"{default_str}.",
105
+ OptimizeWarning, stacklevel=3)
106
+ return choices[default_str]
107
+
108
+
109
+ def _linprog_highs(lp, solver, time_limit=None, presolve=True,
110
+ disp=False, maxiter=None,
111
+ dual_feasibility_tolerance=None,
112
+ primal_feasibility_tolerance=None,
113
+ ipm_optimality_tolerance=None,
114
+ simplex_dual_edge_weight_strategy=None,
115
+ mip_rel_gap=None,
116
+ mip_max_nodes=None,
117
+ **unknown_options):
118
+ r"""
119
+ Solve the following linear programming problem using one of the HiGHS
120
+ solvers:
121
+
122
+ User-facing documentation is in _linprog_doc.py.
123
+
124
+ Parameters
125
+ ----------
126
+ lp : _LPProblem
127
+ A ``scipy.optimize._linprog_util._LPProblem`` ``namedtuple``.
128
+ solver : "ipm" or "simplex" or None
129
+ Which HiGHS solver to use. If ``None``, "simplex" will be used.
130
+
131
+ Options
132
+ -------
133
+ maxiter : int
134
+ The maximum number of iterations to perform in either phase. For
135
+ ``solver='ipm'``, this does not include the number of crossover
136
+ iterations. Default is the largest possible value for an ``int``
137
+ on the platform.
138
+ disp : bool
139
+ Set to ``True`` if indicators of optimization status are to be printed
140
+ to the console each iteration; default ``False``.
141
+ time_limit : float
142
+ The maximum time in seconds allotted to solve the problem; default is
143
+ the largest possible value for a ``double`` on the platform.
144
+ presolve : bool
145
+ Presolve attempts to identify trivial infeasibilities,
146
+ identify trivial unboundedness, and simplify the problem before
147
+ sending it to the main solver. It is generally recommended
148
+ to keep the default setting ``True``; set to ``False`` if presolve is
149
+ to be disabled.
150
+ dual_feasibility_tolerance : double
151
+ Dual feasibility tolerance. Default is 1e-07.
152
+ The minimum of this and ``primal_feasibility_tolerance``
153
+ is used for the feasibility tolerance when ``solver='ipm'``.
154
+ primal_feasibility_tolerance : double
155
+ Primal feasibility tolerance. Default is 1e-07.
156
+ The minimum of this and ``dual_feasibility_tolerance``
157
+ is used for the feasibility tolerance when ``solver='ipm'``.
158
+ ipm_optimality_tolerance : double
159
+ Optimality tolerance for ``solver='ipm'``. Default is 1e-08.
160
+ Minimum possible value is 1e-12 and must be smaller than the largest
161
+ possible value for a ``double`` on the platform.
162
+ simplex_dual_edge_weight_strategy : str (default: None)
163
+ Strategy for simplex dual edge weights. The default, ``None``,
164
+ automatically selects one of the following.
165
+
166
+ ``'dantzig'`` uses Dantzig's original strategy of choosing the most
167
+ negative reduced cost.
168
+
169
+ ``'devex'`` uses the strategy described in [15]_.
170
+
171
+ ``steepest`` uses the exact steepest edge strategy as described in
172
+ [16]_.
173
+
174
+ ``'steepest-devex'`` begins with the exact steepest edge strategy
175
+ until the computation is too costly or inexact and then switches to
176
+ the devex method.
177
+
178
+ Currently, using ``None`` always selects ``'steepest-devex'``, but this
179
+ may change as new options become available.
180
+
181
+ mip_max_nodes : int
182
+ The maximum number of nodes allotted to solve the problem; default is
183
+ the largest possible value for a ``HighsInt`` on the platform.
184
+ Ignored if not using the MIP solver.
185
+ unknown_options : dict
186
+ Optional arguments not used by this particular solver. If
187
+ ``unknown_options`` is non-empty, a warning is issued listing all
188
+ unused options.
189
+
190
+ Returns
191
+ -------
192
+ sol : dict
193
+ A dictionary consisting of the fields:
194
+
195
+ x : 1D array
196
+ The values of the decision variables that minimizes the
197
+ objective function while satisfying the constraints.
198
+ fun : float
199
+ The optimal value of the objective function ``c @ x``.
200
+ slack : 1D array
201
+ The (nominally positive) values of the slack,
202
+ ``b_ub - A_ub @ x``.
203
+ con : 1D array
204
+ The (nominally zero) residuals of the equality constraints,
205
+ ``b_eq - A_eq @ x``.
206
+ success : bool
207
+ ``True`` when the algorithm succeeds in finding an optimal
208
+ solution.
209
+ status : int
210
+ An integer representing the exit status of the algorithm.
211
+
212
+ ``0`` : Optimization terminated successfully.
213
+
214
+ ``1`` : Iteration or time limit reached.
215
+
216
+ ``2`` : Problem appears to be infeasible.
217
+
218
+ ``3`` : Problem appears to be unbounded.
219
+
220
+ ``4`` : The HiGHS solver ran into a problem.
221
+
222
+ message : str
223
+ A string descriptor of the exit status of the algorithm.
224
+ nit : int
225
+ The total number of iterations performed.
226
+ For ``solver='simplex'``, this includes iterations in all
227
+ phases. For ``solver='ipm'``, this does not include
228
+ crossover iterations.
229
+ crossover_nit : int
230
+ The number of primal/dual pushes performed during the
231
+ crossover routine for ``solver='ipm'``. This is ``0``
232
+ for ``solver='simplex'``.
233
+ ineqlin : OptimizeResult
234
+ Solution and sensitivity information corresponding to the
235
+ inequality constraints, `b_ub`. A dictionary consisting of the
236
+ fields:
237
+
238
+ residual : np.ndnarray
239
+ The (nominally positive) values of the slack variables,
240
+ ``b_ub - A_ub @ x``. This quantity is also commonly
241
+ referred to as "slack".
242
+
243
+ marginals : np.ndarray
244
+ The sensitivity (partial derivative) of the objective
245
+ function with respect to the right-hand side of the
246
+ inequality constraints, `b_ub`.
247
+
248
+ eqlin : OptimizeResult
249
+ Solution and sensitivity information corresponding to the
250
+ equality constraints, `b_eq`. A dictionary consisting of the
251
+ fields:
252
+
253
+ residual : np.ndarray
254
+ The (nominally zero) residuals of the equality constraints,
255
+ ``b_eq - A_eq @ x``.
256
+
257
+ marginals : np.ndarray
258
+ The sensitivity (partial derivative) of the objective
259
+ function with respect to the right-hand side of the
260
+ equality constraints, `b_eq`.
261
+
262
+ lower, upper : OptimizeResult
263
+ Solution and sensitivity information corresponding to the
264
+ lower and upper bounds on decision variables, `bounds`.
265
+
266
+ residual : np.ndarray
267
+ The (nominally positive) values of the quantity
268
+ ``x - lb`` (lower) or ``ub - x`` (upper).
269
+
270
+ marginals : np.ndarray
271
+ The sensitivity (partial derivative) of the objective
272
+ function with respect to the lower and upper
273
+ `bounds`.
274
+
275
+ mip_node_count : int
276
+ The number of subproblems or "nodes" solved by the MILP
277
+ solver. Only present when `integrality` is not `None`.
278
+
279
+ mip_dual_bound : float
280
+ The MILP solver's final estimate of the lower bound on the
281
+ optimal solution. Only present when `integrality` is not
282
+ `None`.
283
+
284
+ mip_gap : float
285
+ The difference between the final objective function value
286
+ and the final dual bound, scaled by the final objective
287
+ function value. Only present when `integrality` is not
288
+ `None`.
289
+
290
+ Notes
291
+ -----
292
+ The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
293
+ `marginals`, or partial derivatives of the objective function with respect
294
+ to the right-hand side of each constraint. These partial derivatives are
295
+ also referred to as "Lagrange multipliers", "dual values", and
296
+ "shadow prices". The sign convention of `marginals` is opposite that
297
+ of Lagrange multipliers produced by many nonlinear solvers.
298
+
299
+ References
300
+ ----------
301
+ .. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
302
+ Mathematical programming 5.1 (1973): 1-28.
303
+ .. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
304
+ simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
305
+ """
306
+ if unknown_options:
307
+ message = (f"Unrecognized options detected: {unknown_options}. "
308
+ "These will be passed to HiGHS verbatim.")
309
+ warn(message, OptimizeWarning, stacklevel=3)
310
+
311
+ # Map options to HiGHS enum values
312
+ simplex_dual_edge_weight_strategy_enum = _convert_to_highs_enum(
313
+ simplex_dual_edge_weight_strategy,
314
+ 'simplex_dual_edge_weight_strategy',
315
+ choices={'dantzig': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG,
316
+ 'devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX,
317
+ 'steepest-devex': HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE,
318
+ 'steepest':
319
+ HIGHS_SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE,
320
+ None: None})
321
+
322
+ c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality = lp
323
+
324
+ lb, ub = bounds.T.copy() # separate bounds, copy->C-cntgs
325
+ # highs_wrapper solves LHS <= A*x <= RHS, not equality constraints
326
+ with np.errstate(invalid="ignore"):
327
+ lhs_ub = -np.ones_like(b_ub)*np.inf # LHS of UB constraints is -inf
328
+ rhs_ub = b_ub # RHS of UB constraints is b_ub
329
+ lhs_eq = b_eq # Equality constraint is inequality
330
+ rhs_eq = b_eq # constraint with LHS=RHS
331
+ lhs = np.concatenate((lhs_ub, lhs_eq))
332
+ rhs = np.concatenate((rhs_ub, rhs_eq))
333
+
334
+ if issparse(A_ub) or issparse(A_eq):
335
+ A = vstack((A_ub, A_eq))
336
+ else:
337
+ A = np.vstack((A_ub, A_eq))
338
+ A = csc_matrix(A)
339
+
340
+ options = {
341
+ 'presolve': presolve,
342
+ 'sense': HIGHS_OBJECTIVE_SENSE_MINIMIZE,
343
+ 'solver': solver,
344
+ 'time_limit': time_limit,
345
+ 'highs_debug_level': MESSAGE_LEVEL_NONE,
346
+ 'dual_feasibility_tolerance': dual_feasibility_tolerance,
347
+ 'ipm_optimality_tolerance': ipm_optimality_tolerance,
348
+ 'log_to_console': disp,
349
+ 'mip_max_nodes': mip_max_nodes,
350
+ 'output_flag': disp,
351
+ 'primal_feasibility_tolerance': primal_feasibility_tolerance,
352
+ 'simplex_dual_edge_weight_strategy':
353
+ simplex_dual_edge_weight_strategy_enum,
354
+ 'simplex_strategy': HIGHS_SIMPLEX_STRATEGY_DUAL,
355
+ 'simplex_crash_strategy': HIGHS_SIMPLEX_CRASH_STRATEGY_OFF,
356
+ 'ipm_iteration_limit': maxiter,
357
+ 'simplex_iteration_limit': maxiter,
358
+ 'mip_rel_gap': mip_rel_gap,
359
+ }
360
+ options.update(unknown_options)
361
+
362
+ # np.inf doesn't work; use very large constant
363
+ rhs = _replace_inf(rhs)
364
+ lhs = _replace_inf(lhs)
365
+ lb = _replace_inf(lb)
366
+ ub = _replace_inf(ub)
367
+
368
+ if integrality is None or np.sum(integrality) == 0:
369
+ integrality = np.empty(0)
370
+ else:
371
+ integrality = np.array(integrality)
372
+
373
+ res = _highs_wrapper(c, A.indptr, A.indices, A.data, lhs, rhs,
374
+ lb, ub, integrality.astype(np.uint8), options)
375
+
376
+ # HiGHS represents constraints as lhs/rhs, so
377
+ # Ax + s = b => Ax = b - s
378
+ # and we need to split up s by A_ub and A_eq
379
+ if 'slack' in res:
380
+ slack = res['slack']
381
+ con = np.array(slack[len(b_ub):])
382
+ slack = np.array(slack[:len(b_ub)])
383
+ else:
384
+ slack, con = None, None
385
+
386
+ # lagrange multipliers for equalities/inequalities and upper/lower bounds
387
+ if 'lambda' in res:
388
+ lamda = res['lambda']
389
+ marg_ineqlin = np.array(lamda[:len(b_ub)])
390
+ marg_eqlin = np.array(lamda[len(b_ub):])
391
+ marg_upper = np.array(res['marg_bnds'][1, :])
392
+ marg_lower = np.array(res['marg_bnds'][0, :])
393
+ else:
394
+ marg_ineqlin, marg_eqlin = None, None
395
+ marg_upper, marg_lower = None, None
396
+
397
+ # this needs to be updated if we start choosing the solver intelligently
398
+
399
+ # Convert to scipy-style status and message
400
+ highs_status = res.get('status', None)
401
+ highs_message = res.get('message', None)
402
+ status, message = _highs_to_scipy_status_message(highs_status,
403
+ highs_message)
404
+
405
+ x = np.array(res['x']) if 'x' in res else None
406
+ sol = {'x': x,
407
+ 'slack': slack,
408
+ 'con': con,
409
+ 'ineqlin': OptimizeResult({
410
+ 'residual': slack,
411
+ 'marginals': marg_ineqlin,
412
+ }),
413
+ 'eqlin': OptimizeResult({
414
+ 'residual': con,
415
+ 'marginals': marg_eqlin,
416
+ }),
417
+ 'lower': OptimizeResult({
418
+ 'residual': None if x is None else x - lb,
419
+ 'marginals': marg_lower,
420
+ }),
421
+ 'upper': OptimizeResult({
422
+ 'residual': None if x is None else ub - x,
423
+ 'marginals': marg_upper
424
+ }),
425
+ 'fun': res.get('fun'),
426
+ 'status': status,
427
+ 'success': res['status'] == MODEL_STATUS_OPTIMAL,
428
+ 'message': message,
429
+ 'nit': res.get('simplex_nit', 0) or res.get('ipm_nit', 0),
430
+ 'crossover_nit': res.get('crossover_nit'),
431
+ }
432
+
433
+ if np.any(x) and integrality is not None:
434
+ sol.update({
435
+ 'mip_node_count': res.get('mip_node_count', 0),
436
+ 'mip_dual_bound': res.get('mip_dual_bound', 0.0),
437
+ 'mip_gap': res.get('mip_gap', 0.0),
438
+ })
439
+
440
+ return sol
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_ip.py ADDED
@@ -0,0 +1,1126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Interior-point method for linear programming
2
+
3
+ The *interior-point* method uses the primal-dual path following algorithm
4
+ outlined in [1]_. This algorithm supports sparse constraint matrices and
5
+ is typically faster than the simplex methods, especially for large, sparse
6
+ problems. Note, however, that the solution returned may be slightly less
7
+ accurate than those of the simplex methods and will not, in general,
8
+ correspond with a vertex of the polytope defined by the constraints.
9
+
10
+ .. versionadded:: 1.0.0
11
+
12
+ References
13
+ ----------
14
+ .. [1] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
15
+ optimizer for linear programming: an implementation of the
16
+ homogeneous algorithm." High performance optimization. Springer US,
17
+ 2000. 197-232.
18
+ """
19
+ # Author: Matt Haberland
20
+
21
+ import numpy as np
22
+ import scipy as sp
23
+ import scipy.sparse as sps
24
+ from warnings import warn
25
+ from scipy.linalg import LinAlgError
26
+ from ._optimize import OptimizeWarning, OptimizeResult, _check_unknown_options
27
+ from ._linprog_util import _postsolve
28
+ has_umfpack = True
29
+ has_cholmod = True
30
+ try:
31
+ import sksparse # noqa: F401
32
+ from sksparse.cholmod import cholesky as cholmod # noqa: F401
33
+ from sksparse.cholmod import analyze as cholmod_analyze
34
+ except ImportError:
35
+ has_cholmod = False
36
+ try:
37
+ import scikits.umfpack # test whether to use factorized # noqa: F401
38
+ except ImportError:
39
+ has_umfpack = False
40
+
41
+
42
+ def _get_solver(M, sparse=False, lstsq=False, sym_pos=True,
43
+ cholesky=True, permc_spec='MMD_AT_PLUS_A'):
44
+ """
45
+ Given solver options, return a handle to the appropriate linear system
46
+ solver.
47
+
48
+ Parameters
49
+ ----------
50
+ M : 2-D array
51
+ As defined in [4] Equation 8.31
52
+ sparse : bool (default = False)
53
+ True if the system to be solved is sparse. This is typically set
54
+ True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
55
+ lstsq : bool (default = False)
56
+ True if the system is ill-conditioned and/or (nearly) singular and
57
+ thus a more robust least-squares solver is desired. This is sometimes
58
+ needed as the solution is approached.
59
+ sym_pos : bool (default = True)
60
+ True if the system matrix is symmetric positive definite
61
+ Sometimes this needs to be set false as the solution is approached,
62
+ even when the system should be symmetric positive definite, due to
63
+ numerical difficulties.
64
+ cholesky : bool (default = True)
65
+ True if the system is to be solved by Cholesky, rather than LU,
66
+ decomposition. This is typically faster unless the problem is very
67
+ small or prone to numerical difficulties.
68
+ permc_spec : str (default = 'MMD_AT_PLUS_A')
69
+ Sparsity preservation strategy used by SuperLU. Acceptable values are:
70
+
71
+ - ``NATURAL``: natural ordering.
72
+ - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
73
+ - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
74
+ - ``COLAMD``: approximate minimum degree column ordering.
75
+
76
+ See SuperLU documentation.
77
+
78
+ Returns
79
+ -------
80
+ solve : function
81
+ Handle to the appropriate solver function
82
+
83
+ """
84
+ try:
85
+ if sparse:
86
+ if lstsq:
87
+ def solve(r, sym_pos=False):
88
+ return sps.linalg.lsqr(M, r)[0]
89
+ elif cholesky:
90
+ try:
91
+ # Will raise an exception in the first call,
92
+ # or when the matrix changes due to a new problem
93
+ _get_solver.cholmod_factor.cholesky_inplace(M)
94
+ except Exception:
95
+ _get_solver.cholmod_factor = cholmod_analyze(M)
96
+ _get_solver.cholmod_factor.cholesky_inplace(M)
97
+ solve = _get_solver.cholmod_factor
98
+ else:
99
+ if has_umfpack and sym_pos:
100
+ solve = sps.linalg.factorized(M)
101
+ else: # factorized doesn't pass permc_spec
102
+ solve = sps.linalg.splu(M, permc_spec=permc_spec).solve
103
+
104
+ else:
105
+ if lstsq: # sometimes necessary as solution is approached
106
+ def solve(r):
107
+ return sp.linalg.lstsq(M, r)[0]
108
+ elif cholesky:
109
+ L = sp.linalg.cho_factor(M)
110
+
111
+ def solve(r):
112
+ return sp.linalg.cho_solve(L, r)
113
+ else:
114
+ # this seems to cache the matrix factorization, so solving
115
+ # with multiple right hand sides is much faster
116
+ def solve(r, sym_pos=sym_pos):
117
+ if sym_pos:
118
+ return sp.linalg.solve(M, r, assume_a="pos")
119
+ else:
120
+ return sp.linalg.solve(M, r)
121
+ # There are many things that can go wrong here, and it's hard to say
122
+ # what all of them are. It doesn't really matter: if the matrix can't be
123
+ # factorized, return None. get_solver will be called again with different
124
+ # inputs, and a new routine will try to factorize the matrix.
125
+ except KeyboardInterrupt:
126
+ raise
127
+ except Exception:
128
+ return None
129
+ return solve
130
+
131
+
132
+ def _get_delta(A, b, c, x, y, z, tau, kappa, gamma, eta, sparse=False,
133
+ lstsq=False, sym_pos=True, cholesky=True, pc=True, ip=False,
134
+ permc_spec='MMD_AT_PLUS_A'):
135
+ """
136
+ Given standard form problem defined by ``A``, ``b``, and ``c``;
137
+ current variable estimates ``x``, ``y``, ``z``, ``tau``, and ``kappa``;
138
+ algorithmic parameters ``gamma and ``eta;
139
+ and options ``sparse``, ``lstsq``, ``sym_pos``, ``cholesky``, ``pc``
140
+ (predictor-corrector), and ``ip`` (initial point improvement),
141
+ get the search direction for increments to the variable estimates.
142
+
143
+ Parameters
144
+ ----------
145
+ As defined in [4], except:
146
+ sparse : bool
147
+ True if the system to be solved is sparse. This is typically set
148
+ True when the original ``A_ub`` and ``A_eq`` arrays are sparse.
149
+ lstsq : bool
150
+ True if the system is ill-conditioned and/or (nearly) singular and
151
+ thus a more robust least-squares solver is desired. This is sometimes
152
+ needed as the solution is approached.
153
+ sym_pos : bool
154
+ True if the system matrix is symmetric positive definite
155
+ Sometimes this needs to be set false as the solution is approached,
156
+ even when the system should be symmetric positive definite, due to
157
+ numerical difficulties.
158
+ cholesky : bool
159
+ True if the system is to be solved by Cholesky, rather than LU,
160
+ decomposition. This is typically faster unless the problem is very
161
+ small or prone to numerical difficulties.
162
+ pc : bool
163
+ True if the predictor-corrector method of Mehrota is to be used. This
164
+ is almost always (if not always) beneficial. Even though it requires
165
+ the solution of an additional linear system, the factorization
166
+ is typically (implicitly) reused so solution is efficient, and the
167
+ number of algorithm iterations is typically reduced.
168
+ ip : bool
169
+ True if the improved initial point suggestion due to [4] section 4.3
170
+ is desired. It's unclear whether this is beneficial.
171
+ permc_spec : str (default = 'MMD_AT_PLUS_A')
172
+ (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
173
+ True``.) A matrix is factorized in each iteration of the algorithm.
174
+ This option specifies how to permute the columns of the matrix for
175
+ sparsity preservation. Acceptable values are:
176
+
177
+ - ``NATURAL``: natural ordering.
178
+ - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
179
+ - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
180
+ - ``COLAMD``: approximate minimum degree column ordering.
181
+
182
+ This option can impact the convergence of the
183
+ interior point algorithm; test different values to determine which
184
+ performs best for your problem. For more information, refer to
185
+ ``scipy.sparse.linalg.splu``.
186
+
187
+ Returns
188
+ -------
189
+ Search directions as defined in [4]
190
+
191
+ References
192
+ ----------
193
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
194
+ optimizer for linear programming: an implementation of the
195
+ homogeneous algorithm." High performance optimization. Springer US,
196
+ 2000. 197-232.
197
+
198
+ """
199
+ if A.shape[0] == 0:
200
+ # If there are no constraints, some solvers fail (understandably)
201
+ # rather than returning empty solution. This gets the job done.
202
+ sparse, lstsq, sym_pos, cholesky = False, False, True, False
203
+ n_x = len(x)
204
+
205
+ # [4] Equation 8.8
206
+ r_P = b * tau - A.dot(x)
207
+ r_D = c * tau - A.T.dot(y) - z
208
+ r_G = c.dot(x) - b.transpose().dot(y) + kappa
209
+ mu = (x.dot(z) + tau * kappa) / (n_x + 1)
210
+
211
+ # Assemble M from [4] Equation 8.31
212
+ Dinv = x / z
213
+
214
+ if sparse:
215
+ M = A.dot(sps.diags(Dinv, 0, format="csc").dot(A.T))
216
+ else:
217
+ M = A.dot(Dinv.reshape(-1, 1) * A.T)
218
+ solve = _get_solver(M, sparse, lstsq, sym_pos, cholesky, permc_spec)
219
+
220
+ # pc: "predictor-corrector" [4] Section 4.1
221
+ # In development this option could be turned off
222
+ # but it always seems to improve performance substantially
223
+ n_corrections = 1 if pc else 0
224
+
225
+ i = 0
226
+ alpha, d_x, d_z, d_tau, d_kappa = 0, 0, 0, 0, 0
227
+ while i <= n_corrections:
228
+ # Reference [4] Eq. 8.6
229
+ rhatp = eta(gamma) * r_P
230
+ rhatd = eta(gamma) * r_D
231
+ rhatg = eta(gamma) * r_G
232
+
233
+ # Reference [4] Eq. 8.7
234
+ rhatxs = gamma * mu - x * z
235
+ rhattk = gamma * mu - tau * kappa
236
+
237
+ if i == 1:
238
+ if ip: # if the correction is to get "initial point"
239
+ # Reference [4] Eq. 8.23
240
+ rhatxs = ((1 - alpha) * gamma * mu -
241
+ x * z - alpha**2 * d_x * d_z)
242
+ rhattk = ((1 - alpha) * gamma * mu -
243
+ tau * kappa -
244
+ alpha**2 * d_tau * d_kappa)
245
+ else: # if the correction is for "predictor-corrector"
246
+ # Reference [4] Eq. 8.13
247
+ rhatxs -= d_x * d_z
248
+ rhattk -= d_tau * d_kappa
249
+
250
+ # sometimes numerical difficulties arise as the solution is approached
251
+ # this loop tries to solve the equations using a sequence of functions
252
+ # for solve. For dense systems, the order is:
253
+ # 1. scipy.linalg.cho_factor/scipy.linalg.cho_solve,
254
+ # 2. scipy.linalg.solve w/ sym_pos = True,
255
+ # 3. scipy.linalg.solve w/ sym_pos = False, and if all else fails
256
+ # 4. scipy.linalg.lstsq
257
+ # For sparse systems, the order is:
258
+ # 1. sksparse.cholmod.cholesky (if available)
259
+ # 2. scipy.sparse.linalg.factorized (if umfpack available)
260
+ # 3. scipy.sparse.linalg.splu
261
+ # 4. scipy.sparse.linalg.lsqr
262
+ solved = False
263
+ while not solved:
264
+ try:
265
+ # [4] Equation 8.28
266
+ p, q = _sym_solve(Dinv, A, c, b, solve)
267
+ # [4] Equation 8.29
268
+ u, v = _sym_solve(Dinv, A, rhatd -
269
+ (1 / x) * rhatxs, rhatp, solve)
270
+ if np.any(np.isnan(p)) or np.any(np.isnan(q)):
271
+ raise LinAlgError
272
+ solved = True
273
+ except (LinAlgError, ValueError, TypeError) as e:
274
+ # Usually this doesn't happen. If it does, it happens when
275
+ # there are redundant constraints or when approaching the
276
+ # solution. If so, change solver.
277
+ if cholesky:
278
+ cholesky = False
279
+ warn(
280
+ "Solving system with option 'cholesky':True "
281
+ "failed. It is normal for this to happen "
282
+ "occasionally, especially as the solution is "
283
+ "approached. However, if you see this frequently, "
284
+ "consider setting option 'cholesky' to False.",
285
+ OptimizeWarning, stacklevel=5)
286
+ elif sym_pos:
287
+ sym_pos = False
288
+ warn(
289
+ "Solving system with option 'sym_pos':True "
290
+ "failed. It is normal for this to happen "
291
+ "occasionally, especially as the solution is "
292
+ "approached. However, if you see this frequently, "
293
+ "consider setting option 'sym_pos' to False.",
294
+ OptimizeWarning, stacklevel=5)
295
+ elif not lstsq:
296
+ lstsq = True
297
+ warn(
298
+ "Solving system with option 'sym_pos':False "
299
+ "failed. This may happen occasionally, "
300
+ "especially as the solution is "
301
+ "approached. However, if you see this frequently, "
302
+ "your problem may be numerically challenging. "
303
+ "If you cannot improve the formulation, consider "
304
+ "setting 'lstsq' to True. Consider also setting "
305
+ "`presolve` to True, if it is not already.",
306
+ OptimizeWarning, stacklevel=5)
307
+ else:
308
+ raise e
309
+ solve = _get_solver(M, sparse, lstsq, sym_pos,
310
+ cholesky, permc_spec)
311
+ # [4] Results after 8.29
312
+ d_tau = ((rhatg + 1 / tau * rhattk - (-c.dot(u) + b.dot(v))) /
313
+ (1 / tau * kappa + (-c.dot(p) + b.dot(q))))
314
+ d_x = u + p * d_tau
315
+ d_y = v + q * d_tau
316
+
317
+ # [4] Relations between after 8.25 and 8.26
318
+ d_z = (1 / x) * (rhatxs - z * d_x)
319
+ d_kappa = 1 / tau * (rhattk - kappa * d_tau)
320
+
321
+ # [4] 8.12 and "Let alpha be the maximal possible step..." before 8.23
322
+ alpha = _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, 1)
323
+ if ip: # initial point - see [4] 4.4
324
+ gamma = 10
325
+ else: # predictor-corrector, [4] definition after 8.12
326
+ beta1 = 0.1 # [4] pg. 220 (Table 8.1)
327
+ gamma = (1 - alpha)**2 * min(beta1, (1 - alpha))
328
+ i += 1
329
+
330
+ return d_x, d_y, d_z, d_tau, d_kappa
331
+
332
+
333
+ def _sym_solve(Dinv, A, r1, r2, solve):
334
+ """
335
+ An implementation of [4] equation 8.31 and 8.32
336
+
337
+ References
338
+ ----------
339
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
340
+ optimizer for linear programming: an implementation of the
341
+ homogeneous algorithm." High performance optimization. Springer US,
342
+ 2000. 197-232.
343
+
344
+ """
345
+ # [4] 8.31
346
+ r = r2 + A.dot(Dinv * r1)
347
+ v = solve(r)
348
+ # [4] 8.32
349
+ u = Dinv * (A.T.dot(v) - r1)
350
+ return u, v
351
+
352
+
353
+ def _get_step(x, d_x, z, d_z, tau, d_tau, kappa, d_kappa, alpha0):
354
+ """
355
+ An implementation of [4] equation 8.21
356
+
357
+ References
358
+ ----------
359
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
360
+ optimizer for linear programming: an implementation of the
361
+ homogeneous algorithm." High performance optimization. Springer US,
362
+ 2000. 197-232.
363
+
364
+ """
365
+ # [4] 4.3 Equation 8.21, ignoring 8.20 requirement
366
+ # same step is taken in primal and dual spaces
367
+ # alpha0 is basically beta3 from [4] Table 8.1, but instead of beta3
368
+ # the value 1 is used in Mehrota corrector and initial point correction
369
+ i_x = d_x < 0
370
+ i_z = d_z < 0
371
+ alpha_x = alpha0 * np.min(x[i_x] / -d_x[i_x]) if np.any(i_x) else 1
372
+ alpha_tau = alpha0 * tau / -d_tau if d_tau < 0 else 1
373
+ alpha_z = alpha0 * np.min(z[i_z] / -d_z[i_z]) if np.any(i_z) else 1
374
+ alpha_kappa = alpha0 * kappa / -d_kappa if d_kappa < 0 else 1
375
+ alpha = np.min([1, alpha_x, alpha_tau, alpha_z, alpha_kappa])
376
+ return alpha
377
+
378
+
379
+ def _get_message(status):
380
+ """
381
+ Given problem status code, return a more detailed message.
382
+
383
+ Parameters
384
+ ----------
385
+ status : int
386
+ An integer representing the exit status of the optimization::
387
+
388
+ 0 : Optimization terminated successfully
389
+ 1 : Iteration limit reached
390
+ 2 : Problem appears to be infeasible
391
+ 3 : Problem appears to be unbounded
392
+ 4 : Serious numerical difficulties encountered
393
+
394
+ Returns
395
+ -------
396
+ message : str
397
+ A string descriptor of the exit status of the optimization.
398
+
399
+ """
400
+ messages = (
401
+ ["Optimization terminated successfully.",
402
+ "The iteration limit was reached before the algorithm converged.",
403
+ "The algorithm terminated successfully and determined that the "
404
+ "problem is infeasible.",
405
+ "The algorithm terminated successfully and determined that the "
406
+ "problem is unbounded.",
407
+ "Numerical difficulties were encountered before the problem "
408
+ "converged. Please check your problem formulation for errors, "
409
+ "independence of linear equality constraints, and reasonable "
410
+ "scaling and matrix condition numbers. If you continue to "
411
+ "encounter this error, please submit a bug report."
412
+ ])
413
+ return messages[status]
414
+
415
+
416
+ def _do_step(x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha):
417
+ """
418
+ An implementation of [4] Equation 8.9
419
+
420
+ References
421
+ ----------
422
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
423
+ optimizer for linear programming: an implementation of the
424
+ homogeneous algorithm." High performance optimization. Springer US,
425
+ 2000. 197-232.
426
+
427
+ """
428
+ x = x + alpha * d_x
429
+ tau = tau + alpha * d_tau
430
+ z = z + alpha * d_z
431
+ kappa = kappa + alpha * d_kappa
432
+ y = y + alpha * d_y
433
+ return x, y, z, tau, kappa
434
+
435
+
436
+ def _get_blind_start(shape):
437
+ """
438
+ Return the starting point from [4] 4.4
439
+
440
+ References
441
+ ----------
442
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
443
+ optimizer for linear programming: an implementation of the
444
+ homogeneous algorithm." High performance optimization. Springer US,
445
+ 2000. 197-232.
446
+
447
+ """
448
+ m, n = shape
449
+ x0 = np.ones(n)
450
+ y0 = np.zeros(m)
451
+ z0 = np.ones(n)
452
+ tau0 = 1
453
+ kappa0 = 1
454
+ return x0, y0, z0, tau0, kappa0
455
+
456
+
457
+ def _indicators(A, b, c, c0, x, y, z, tau, kappa):
458
+ """
459
+ Implementation of several equations from [4] used as indicators of
460
+ the status of optimization.
461
+
462
+ References
463
+ ----------
464
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
465
+ optimizer for linear programming: an implementation of the
466
+ homogeneous algorithm." High performance optimization. Springer US,
467
+ 2000. 197-232.
468
+
469
+ """
470
+
471
+ # residuals for termination are relative to initial values
472
+ x0, y0, z0, tau0, kappa0 = _get_blind_start(A.shape)
473
+
474
+ # See [4], Section 4 - The Homogeneous Algorithm, Equation 8.8
475
+ def r_p(x, tau):
476
+ return b * tau - A.dot(x)
477
+
478
+ def r_d(y, z, tau):
479
+ return c * tau - A.T.dot(y) - z
480
+
481
+ def r_g(x, y, kappa):
482
+ return kappa + c.dot(x) - b.dot(y)
483
+
484
+ # np.dot unpacks if they are arrays of size one
485
+ def mu(x, tau, z, kappa):
486
+ return (x.dot(z) + np.dot(tau, kappa)) / (len(x) + 1)
487
+
488
+ obj = c.dot(x / tau) + c0
489
+
490
+ def norm(a):
491
+ return np.linalg.norm(a)
492
+
493
+ # See [4], Section 4.5 - The Stopping Criteria
494
+ r_p0 = r_p(x0, tau0)
495
+ r_d0 = r_d(y0, z0, tau0)
496
+ r_g0 = r_g(x0, y0, kappa0)
497
+ mu_0 = mu(x0, tau0, z0, kappa0)
498
+ rho_A = norm(c.T.dot(x) - b.T.dot(y)) / (tau + norm(b.T.dot(y)))
499
+ rho_p = norm(r_p(x, tau)) / max(1, norm(r_p0))
500
+ rho_d = norm(r_d(y, z, tau)) / max(1, norm(r_d0))
501
+ rho_g = norm(r_g(x, y, kappa)) / max(1, norm(r_g0))
502
+ rho_mu = mu(x, tau, z, kappa) / mu_0
503
+ return rho_p, rho_d, rho_A, rho_g, rho_mu, obj
504
+
505
+
506
+ def _display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj, header=False):
507
+ """
508
+ Print indicators of optimization status to the console.
509
+
510
+ Parameters
511
+ ----------
512
+ rho_p : float
513
+ The (normalized) primal feasibility, see [4] 4.5
514
+ rho_d : float
515
+ The (normalized) dual feasibility, see [4] 4.5
516
+ rho_g : float
517
+ The (normalized) duality gap, see [4] 4.5
518
+ alpha : float
519
+ The step size, see [4] 4.3
520
+ rho_mu : float
521
+ The (normalized) path parameter, see [4] 4.5
522
+ obj : float
523
+ The objective function value of the current iterate
524
+ header : bool
525
+ True if a header is to be printed
526
+
527
+ References
528
+ ----------
529
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
530
+ optimizer for linear programming: an implementation of the
531
+ homogeneous algorithm." High performance optimization. Springer US,
532
+ 2000. 197-232.
533
+
534
+ """
535
+ if header:
536
+ print("Primal Feasibility ",
537
+ "Dual Feasibility ",
538
+ "Duality Gap ",
539
+ "Step ",
540
+ "Path Parameter ",
541
+ "Objective ")
542
+
543
+ # no clue why this works
544
+ fmt = '{0:<20.13}{1:<20.13}{2:<20.13}{3:<17.13}{4:<20.13}{5:<20.13}'
545
+ print(fmt.format(
546
+ float(rho_p),
547
+ float(rho_d),
548
+ float(rho_g),
549
+ alpha if isinstance(alpha, str) else float(alpha),
550
+ float(rho_mu),
551
+ float(obj)))
552
+
553
+
554
+ def _ip_hsd(A, b, c, c0, alpha0, beta, maxiter, disp, tol, sparse, lstsq,
555
+ sym_pos, cholesky, pc, ip, permc_spec, callback, postsolve_args):
556
+ r"""
557
+ Solve a linear programming problem in standard form:
558
+
559
+ Minimize::
560
+
561
+ c @ x
562
+
563
+ Subject to::
564
+
565
+ A @ x == b
566
+ x >= 0
567
+
568
+ using the interior point method of [4].
569
+
570
+ Parameters
571
+ ----------
572
+ A : 2-D array
573
+ 2-D array such that ``A @ x``, gives the values of the equality
574
+ constraints at ``x``.
575
+ b : 1-D array
576
+ 1-D array of values representing the RHS of each equality constraint
577
+ (row) in ``A`` (for standard form problem).
578
+ c : 1-D array
579
+ Coefficients of the linear objective function to be minimized (for
580
+ standard form problem).
581
+ c0 : float
582
+ Constant term in objective function due to fixed (and eliminated)
583
+ variables. (Purely for display.)
584
+ alpha0 : float
585
+ The maximal step size for Mehrota's predictor-corrector search
586
+ direction; see :math:`\beta_3`of [4] Table 8.1
587
+ beta : float
588
+ The desired reduction of the path parameter :math:`\mu` (see [6]_)
589
+ maxiter : int
590
+ The maximum number of iterations of the algorithm.
591
+ disp : bool
592
+ Set to ``True`` if indicators of optimization status are to be printed
593
+ to the console each iteration.
594
+ tol : float
595
+ Termination tolerance; see [4]_ Section 4.5.
596
+ sparse : bool
597
+ Set to ``True`` if the problem is to be treated as sparse. However,
598
+ the inputs ``A_eq`` and ``A_ub`` should nonetheless be provided as
599
+ (dense) arrays rather than sparse matrices.
600
+ lstsq : bool
601
+ Set to ``True`` if the problem is expected to be very poorly
602
+ conditioned. This should always be left as ``False`` unless severe
603
+ numerical difficulties are frequently encountered, and a better option
604
+ would be to improve the formulation of the problem.
605
+ sym_pos : bool
606
+ Leave ``True`` if the problem is expected to yield a well conditioned
607
+ symmetric positive definite normal equation matrix (almost always).
608
+ cholesky : bool
609
+ Set to ``True`` if the normal equations are to be solved by explicit
610
+ Cholesky decomposition followed by explicit forward/backward
611
+ substitution. This is typically faster for moderate, dense problems
612
+ that are numerically well-behaved.
613
+ pc : bool
614
+ Leave ``True`` if the predictor-corrector method of Mehrota is to be
615
+ used. This is almost always (if not always) beneficial.
616
+ ip : bool
617
+ Set to ``True`` if the improved initial point suggestion due to [4]_
618
+ Section 4.3 is desired. It's unclear whether this is beneficial.
619
+ permc_spec : str (default = 'MMD_AT_PLUS_A')
620
+ (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
621
+ True``.) A matrix is factorized in each iteration of the algorithm.
622
+ This option specifies how to permute the columns of the matrix for
623
+ sparsity preservation. Acceptable values are:
624
+
625
+ - ``NATURAL``: natural ordering.
626
+ - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
627
+ - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
628
+ - ``COLAMD``: approximate minimum degree column ordering.
629
+
630
+ This option can impact the convergence of the
631
+ interior point algorithm; test different values to determine which
632
+ performs best for your problem. For more information, refer to
633
+ ``scipy.sparse.linalg.splu``.
634
+ callback : callable, optional
635
+ If a callback function is provided, it will be called within each
636
+ iteration of the algorithm. The callback function must accept a single
637
+ `scipy.optimize.OptimizeResult` consisting of the following fields:
638
+
639
+ x : 1-D array
640
+ Current solution vector
641
+ fun : float
642
+ Current value of the objective function
643
+ success : bool
644
+ True only when an algorithm has completed successfully,
645
+ so this is always False as the callback function is called
646
+ only while the algorithm is still iterating.
647
+ slack : 1-D array
648
+ The values of the slack variables. Each slack variable
649
+ corresponds to an inequality constraint. If the slack is zero,
650
+ the corresponding constraint is active.
651
+ con : 1-D array
652
+ The (nominally zero) residuals of the equality constraints,
653
+ that is, ``b - A_eq @ x``
654
+ phase : int
655
+ The phase of the algorithm being executed. This is always
656
+ 1 for the interior-point method because it has only one phase.
657
+ status : int
658
+ For revised simplex, this is always 0 because if a different
659
+ status is detected, the algorithm terminates.
660
+ nit : int
661
+ The number of iterations performed.
662
+ message : str
663
+ A string descriptor of the exit status of the optimization.
664
+ postsolve_args : tuple
665
+ Data needed by _postsolve to convert the solution to the standard-form
666
+ problem into the solution to the original problem.
667
+
668
+ Returns
669
+ -------
670
+ x_hat : float
671
+ Solution vector (for standard form problem).
672
+ status : int
673
+ An integer representing the exit status of the optimization::
674
+
675
+ 0 : Optimization terminated successfully
676
+ 1 : Iteration limit reached
677
+ 2 : Problem appears to be infeasible
678
+ 3 : Problem appears to be unbounded
679
+ 4 : Serious numerical difficulties encountered
680
+
681
+ message : str
682
+ A string descriptor of the exit status of the optimization.
683
+ iteration : int
684
+ The number of iterations taken to solve the problem
685
+
686
+ References
687
+ ----------
688
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
689
+ optimizer for linear programming: an implementation of the
690
+ homogeneous algorithm." High performance optimization. Springer US,
691
+ 2000. 197-232.
692
+ .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
693
+ Programming based on Newton's Method." Unpublished Course Notes,
694
+ March 2004. Available 2/25/2017 at:
695
+ https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
696
+
697
+ """
698
+
699
+ iteration = 0
700
+
701
+ # default initial point
702
+ x, y, z, tau, kappa = _get_blind_start(A.shape)
703
+
704
+ # first iteration is special improvement of initial point
705
+ ip = ip if pc else False
706
+
707
+ # [4] 4.5
708
+ rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
709
+ A, b, c, c0, x, y, z, tau, kappa)
710
+ go = rho_p > tol or rho_d > tol or rho_A > tol # we might get lucky : )
711
+
712
+ if disp:
713
+ _display_iter(rho_p, rho_d, rho_g, "-", rho_mu, obj, header=True)
714
+ if callback is not None:
715
+ x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
716
+ res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
717
+ 'con': con, 'nit': iteration, 'phase': 1,
718
+ 'complete': False, 'status': 0,
719
+ 'message': "", 'success': False})
720
+ callback(res)
721
+
722
+ status = 0
723
+ message = "Optimization terminated successfully."
724
+
725
+ if sparse:
726
+ A = sps.csc_matrix(A)
727
+
728
+ while go:
729
+
730
+ iteration += 1
731
+
732
+ if ip: # initial point
733
+ # [4] Section 4.4
734
+ gamma = 1
735
+
736
+ def eta(g):
737
+ return 1
738
+ else:
739
+ # gamma = 0 in predictor step according to [4] 4.1
740
+ # if predictor/corrector is off, use mean of complementarity [6]
741
+ # 5.1 / [4] Below Figure 10-4
742
+ gamma = 0 if pc else beta * np.mean(z * x)
743
+ # [4] Section 4.1
744
+
745
+ def eta(g=gamma):
746
+ return 1 - g
747
+
748
+ try:
749
+ # Solve [4] 8.6 and 8.7/8.13/8.23
750
+ d_x, d_y, d_z, d_tau, d_kappa = _get_delta(
751
+ A, b, c, x, y, z, tau, kappa, gamma, eta,
752
+ sparse, lstsq, sym_pos, cholesky, pc, ip, permc_spec)
753
+
754
+ if ip: # initial point
755
+ # [4] 4.4
756
+ # Formula after 8.23 takes a full step regardless if this will
757
+ # take it negative
758
+ alpha = 1.0
759
+ x, y, z, tau, kappa = _do_step(
760
+ x, y, z, tau, kappa, d_x, d_y,
761
+ d_z, d_tau, d_kappa, alpha)
762
+ x[x < 1] = 1
763
+ z[z < 1] = 1
764
+ tau = max(1, tau)
765
+ kappa = max(1, kappa)
766
+ ip = False # done with initial point
767
+ else:
768
+ # [4] Section 4.3
769
+ alpha = _get_step(x, d_x, z, d_z, tau,
770
+ d_tau, kappa, d_kappa, alpha0)
771
+ # [4] Equation 8.9
772
+ x, y, z, tau, kappa = _do_step(
773
+ x, y, z, tau, kappa, d_x, d_y, d_z, d_tau, d_kappa, alpha)
774
+
775
+ except (LinAlgError, FloatingPointError,
776
+ ValueError, ZeroDivisionError):
777
+ # this can happen when sparse solver is used and presolve
778
+ # is turned off. Also observed ValueError in AppVeyor Python 3.6
779
+ # Win32 build (PR #8676). I've never seen it otherwise.
780
+ status = 4
781
+ message = _get_message(status)
782
+ break
783
+
784
+ # [4] 4.5
785
+ rho_p, rho_d, rho_A, rho_g, rho_mu, obj = _indicators(
786
+ A, b, c, c0, x, y, z, tau, kappa)
787
+ go = rho_p > tol or rho_d > tol or rho_A > tol
788
+
789
+ if disp:
790
+ _display_iter(rho_p, rho_d, rho_g, alpha, rho_mu, obj)
791
+ if callback is not None:
792
+ x_o, fun, slack, con = _postsolve(x/tau, postsolve_args)
793
+ res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
794
+ 'con': con, 'nit': iteration, 'phase': 1,
795
+ 'complete': False, 'status': 0,
796
+ 'message': "", 'success': False})
797
+ callback(res)
798
+
799
+ # [4] 4.5
800
+ inf1 = (rho_p < tol and rho_d < tol and rho_g < tol and tau < tol *
801
+ max(1, kappa))
802
+ inf2 = rho_mu < tol and tau < tol * min(1, kappa)
803
+ if inf1 or inf2:
804
+ # [4] Lemma 8.4 / Theorem 8.3
805
+ if b.transpose().dot(y) > tol:
806
+ status = 2
807
+ else: # elif c.T.dot(x) < tol: ? Probably not necessary.
808
+ status = 3
809
+ message = _get_message(status)
810
+ break
811
+ elif iteration >= maxiter:
812
+ status = 1
813
+ message = _get_message(status)
814
+ break
815
+
816
+ x_hat = x / tau
817
+ # [4] Statement after Theorem 8.2
818
+ return x_hat, status, message, iteration
819
+
820
+
821
+ def _linprog_ip(c, c0, A, b, callback, postsolve_args, maxiter=1000, tol=1e-8,
822
+ disp=False, alpha0=.99995, beta=0.1, sparse=False, lstsq=False,
823
+ sym_pos=True, cholesky=None, pc=True, ip=False,
824
+ permc_spec='MMD_AT_PLUS_A', **unknown_options):
825
+ r"""
826
+ Minimize a linear objective function subject to linear
827
+ equality and non-negativity constraints using the interior point method
828
+ of [4]_. Linear programming is intended to solve problems
829
+ of the following form:
830
+
831
+ Minimize::
832
+
833
+ c @ x
834
+
835
+ Subject to::
836
+
837
+ A @ x == b
838
+ x >= 0
839
+
840
+ User-facing documentation is in _linprog_doc.py.
841
+
842
+ Parameters
843
+ ----------
844
+ c : 1-D array
845
+ Coefficients of the linear objective function to be minimized.
846
+ c0 : float
847
+ Constant term in objective function due to fixed (and eliminated)
848
+ variables. (Purely for display.)
849
+ A : 2-D array
850
+ 2-D array such that ``A @ x``, gives the values of the equality
851
+ constraints at ``x``.
852
+ b : 1-D array
853
+ 1-D array of values representing the right hand side of each equality
854
+ constraint (row) in ``A``.
855
+ callback : callable, optional
856
+ Callback function to be executed once per iteration.
857
+ postsolve_args : tuple
858
+ Data needed by _postsolve to convert the solution to the standard-form
859
+ problem into the solution to the original problem.
860
+
861
+ Options
862
+ -------
863
+ maxiter : int (default = 1000)
864
+ The maximum number of iterations of the algorithm.
865
+ tol : float (default = 1e-8)
866
+ Termination tolerance to be used for all termination criteria;
867
+ see [4]_ Section 4.5.
868
+ disp : bool (default = False)
869
+ Set to ``True`` if indicators of optimization status are to be printed
870
+ to the console each iteration.
871
+ alpha0 : float (default = 0.99995)
872
+ The maximal step size for Mehrota's predictor-corrector search
873
+ direction; see :math:`\beta_{3}` of [4]_ Table 8.1.
874
+ beta : float (default = 0.1)
875
+ The desired reduction of the path parameter :math:`\mu` (see [6]_)
876
+ when Mehrota's predictor-corrector is not in use (uncommon).
877
+ sparse : bool (default = False)
878
+ Set to ``True`` if the problem is to be treated as sparse after
879
+ presolve. If either ``A_eq`` or ``A_ub`` is a sparse matrix,
880
+ this option will automatically be set ``True``, and the problem
881
+ will be treated as sparse even during presolve. If your constraint
882
+ matrices contain mostly zeros and the problem is not very small (less
883
+ than about 100 constraints or variables), consider setting ``True``
884
+ or providing ``A_eq`` and ``A_ub`` as sparse matrices.
885
+ lstsq : bool (default = False)
886
+ Set to ``True`` if the problem is expected to be very poorly
887
+ conditioned. This should always be left ``False`` unless severe
888
+ numerical difficulties are encountered. Leave this at the default
889
+ unless you receive a warning message suggesting otherwise.
890
+ sym_pos : bool (default = True)
891
+ Leave ``True`` if the problem is expected to yield a well conditioned
892
+ symmetric positive definite normal equation matrix
893
+ (almost always). Leave this at the default unless you receive
894
+ a warning message suggesting otherwise.
895
+ cholesky : bool (default = True)
896
+ Set to ``True`` if the normal equations are to be solved by explicit
897
+ Cholesky decomposition followed by explicit forward/backward
898
+ substitution. This is typically faster for problems
899
+ that are numerically well-behaved.
900
+ pc : bool (default = True)
901
+ Leave ``True`` if the predictor-corrector method of Mehrota is to be
902
+ used. This is almost always (if not always) beneficial.
903
+ ip : bool (default = False)
904
+ Set to ``True`` if the improved initial point suggestion due to [4]_
905
+ Section 4.3 is desired. Whether this is beneficial or not
906
+ depends on the problem.
907
+ permc_spec : str (default = 'MMD_AT_PLUS_A')
908
+ (Has effect only with ``sparse = True``, ``lstsq = False``, ``sym_pos =
909
+ True``, and no SuiteSparse.)
910
+ A matrix is factorized in each iteration of the algorithm.
911
+ This option specifies how to permute the columns of the matrix for
912
+ sparsity preservation. Acceptable values are:
913
+
914
+ - ``NATURAL``: natural ordering.
915
+ - ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
916
+ - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
917
+ - ``COLAMD``: approximate minimum degree column ordering.
918
+
919
+ This option can impact the convergence of the
920
+ interior point algorithm; test different values to determine which
921
+ performs best for your problem. For more information, refer to
922
+ ``scipy.sparse.linalg.splu``.
923
+ unknown_options : dict
924
+ Optional arguments not used by this particular solver. If
925
+ `unknown_options` is non-empty a warning is issued listing all
926
+ unused options.
927
+
928
+ Returns
929
+ -------
930
+ x : 1-D array
931
+ Solution vector.
932
+ status : int
933
+ An integer representing the exit status of the optimization::
934
+
935
+ 0 : Optimization terminated successfully
936
+ 1 : Iteration limit reached
937
+ 2 : Problem appears to be infeasible
938
+ 3 : Problem appears to be unbounded
939
+ 4 : Serious numerical difficulties encountered
940
+
941
+ message : str
942
+ A string descriptor of the exit status of the optimization.
943
+ iteration : int
944
+ The number of iterations taken to solve the problem.
945
+
946
+ Notes
947
+ -----
948
+ This method implements the algorithm outlined in [4]_ with ideas from [8]_
949
+ and a structure inspired by the simpler methods of [6]_.
950
+
951
+ The primal-dual path following method begins with initial 'guesses' of
952
+ the primal and dual variables of the standard form problem and iteratively
953
+ attempts to solve the (nonlinear) Karush-Kuhn-Tucker conditions for the
954
+ problem with a gradually reduced logarithmic barrier term added to the
955
+ objective. This particular implementation uses a homogeneous self-dual
956
+ formulation, which provides certificates of infeasibility or unboundedness
957
+ where applicable.
958
+
959
+ The default initial point for the primal and dual variables is that
960
+ defined in [4]_ Section 4.4 Equation 8.22. Optionally (by setting initial
961
+ point option ``ip=True``), an alternate (potentially improved) starting
962
+ point can be calculated according to the additional recommendations of
963
+ [4]_ Section 4.4.
964
+
965
+ A search direction is calculated using the predictor-corrector method
966
+ (single correction) proposed by Mehrota and detailed in [4]_ Section 4.1.
967
+ (A potential improvement would be to implement the method of multiple
968
+ corrections described in [4]_ Section 4.2.) In practice, this is
969
+ accomplished by solving the normal equations, [4]_ Section 5.1 Equations
970
+ 8.31 and 8.32, derived from the Newton equations [4]_ Section 5 Equations
971
+ 8.25 (compare to [4]_ Section 4 Equations 8.6-8.8). The advantage of
972
+ solving the normal equations rather than 8.25 directly is that the
973
+ matrices involved are symmetric positive definite, so Cholesky
974
+ decomposition can be used rather than the more expensive LU factorization.
975
+
976
+ With default options, the solver used to perform the factorization depends
977
+ on third-party software availability and the conditioning of the problem.
978
+
979
+ For dense problems, solvers are tried in the following order:
980
+
981
+ 1. ``scipy.linalg.cho_factor``
982
+
983
+ 2. ``scipy.linalg.solve`` with option ``sym_pos=True``
984
+
985
+ 3. ``scipy.linalg.solve`` with option ``sym_pos=False``
986
+
987
+ 4. ``scipy.linalg.lstsq``
988
+
989
+ For sparse problems:
990
+
991
+ 1. ``sksparse.cholmod.cholesky`` (if scikit-sparse and SuiteSparse are installed)
992
+
993
+ 2. ``scipy.sparse.linalg.factorized``
994
+ (if scikit-umfpack and SuiteSparse are installed)
995
+
996
+ 3. ``scipy.sparse.linalg.splu`` (which uses SuperLU distributed with SciPy)
997
+
998
+ 4. ``scipy.sparse.linalg.lsqr``
999
+
1000
+ If the solver fails for any reason, successively more robust (but slower)
1001
+ solvers are attempted in the order indicated. Attempting, failing, and
1002
+ re-starting factorization can be time consuming, so if the problem is
1003
+ numerically challenging, options can be set to bypass solvers that are
1004
+ failing. Setting ``cholesky=False`` skips to solver 2,
1005
+ ``sym_pos=False`` skips to solver 3, and ``lstsq=True`` skips
1006
+ to solver 4 for both sparse and dense problems.
1007
+
1008
+ Potential improvements for combatting issues associated with dense
1009
+ columns in otherwise sparse problems are outlined in [4]_ Section 5.3 and
1010
+ [10]_ Section 4.1-4.2; the latter also discusses the alleviation of
1011
+ accuracy issues associated with the substitution approach to free
1012
+ variables.
1013
+
1014
+ After calculating the search direction, the maximum possible step size
1015
+ that does not activate the non-negativity constraints is calculated, and
1016
+ the smaller of this step size and unity is applied (as in [4]_ Section
1017
+ 4.1.) [4]_ Section 4.3 suggests improvements for choosing the step size.
1018
+
1019
+ The new point is tested according to the termination conditions of [4]_
1020
+ Section 4.5. The same tolerance, which can be set using the ``tol`` option,
1021
+ is used for all checks. (A potential improvement would be to expose
1022
+ the different tolerances to be set independently.) If optimality,
1023
+ unboundedness, or infeasibility is detected, the solve procedure
1024
+ terminates; otherwise it repeats.
1025
+
1026
+ The expected problem formulation differs between the top level ``linprog``
1027
+ module and the method specific solvers. The method specific solvers expect a
1028
+ problem in standard form:
1029
+
1030
+ Minimize::
1031
+
1032
+ c @ x
1033
+
1034
+ Subject to::
1035
+
1036
+ A @ x == b
1037
+ x >= 0
1038
+
1039
+ Whereas the top level ``linprog`` module expects a problem of form:
1040
+
1041
+ Minimize::
1042
+
1043
+ c @ x
1044
+
1045
+ Subject to::
1046
+
1047
+ A_ub @ x <= b_ub
1048
+ A_eq @ x == b_eq
1049
+ lb <= x <= ub
1050
+
1051
+ where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
1052
+
1053
+ The original problem contains equality, upper-bound and variable constraints
1054
+ whereas the method specific solver requires equality constraints and
1055
+ variable non-negativity.
1056
+
1057
+ ``linprog`` module converts the original problem to standard form by
1058
+ converting the simple bounds to upper bound constraints, introducing
1059
+ non-negative slack variables for inequality constraints, and expressing
1060
+ unbounded variables as the difference between two non-negative variables.
1061
+
1062
+
1063
+ References
1064
+ ----------
1065
+ .. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
1066
+ optimizer for linear programming: an implementation of the
1067
+ homogeneous algorithm." High performance optimization. Springer US,
1068
+ 2000. 197-232.
1069
+ .. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
1070
+ Programming based on Newton's Method." Unpublished Course Notes,
1071
+ March 2004. Available 2/25/2017 at
1072
+ https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
1073
+ .. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
1074
+ programming." Mathematical Programming 71.2 (1995): 221-245.
1075
+ .. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
1076
+ programming." Athena Scientific 1 (1997): 997.
1077
+ .. [10] Andersen, Erling D., et al. Implementation of interior point methods
1078
+ for large scale linear programming. HEC/Universite de Geneve, 1996.
1079
+
1080
+ """
1081
+
1082
+ _check_unknown_options(unknown_options)
1083
+
1084
+ # These should be warnings, not errors
1085
+ if (cholesky or cholesky is None) and sparse and not has_cholmod:
1086
+ if cholesky:
1087
+ warn("Sparse cholesky is only available with scikit-sparse. "
1088
+ "Setting `cholesky = False`",
1089
+ OptimizeWarning, stacklevel=3)
1090
+ cholesky = False
1091
+
1092
+ if sparse and lstsq:
1093
+ warn("Option combination 'sparse':True and 'lstsq':True "
1094
+ "is not recommended.",
1095
+ OptimizeWarning, stacklevel=3)
1096
+
1097
+ if lstsq and cholesky:
1098
+ warn("Invalid option combination 'lstsq':True "
1099
+ "and 'cholesky':True; option 'cholesky' has no effect when "
1100
+ "'lstsq' is set True.",
1101
+ OptimizeWarning, stacklevel=3)
1102
+
1103
+ valid_permc_spec = ('NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', 'COLAMD')
1104
+ if permc_spec.upper() not in valid_permc_spec:
1105
+ warn("Invalid permc_spec option: '" + str(permc_spec) + "'. "
1106
+ "Acceptable values are 'NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A', "
1107
+ "and 'COLAMD'. Reverting to default.",
1108
+ OptimizeWarning, stacklevel=3)
1109
+ permc_spec = 'MMD_AT_PLUS_A'
1110
+
1111
+ # This can be an error
1112
+ if not sym_pos and cholesky:
1113
+ raise ValueError(
1114
+ "Invalid option combination 'sym_pos':False "
1115
+ "and 'cholesky':True: Cholesky decomposition is only possible "
1116
+ "for symmetric positive definite matrices.")
1117
+
1118
+ cholesky = cholesky or (cholesky is None and sym_pos and not lstsq)
1119
+
1120
+ x, status, message, iteration = _ip_hsd(A, b, c, c0, alpha0, beta,
1121
+ maxiter, disp, tol, sparse,
1122
+ lstsq, sym_pos, cholesky,
1123
+ pc, ip, permc_spec, callback,
1124
+ postsolve_args)
1125
+
1126
+ return x, status, message, iteration
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_rs.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Revised simplex method for linear programming
2
+
3
+ The *revised simplex* method uses the method described in [1]_, except
4
+ that a factorization [2]_ of the basis matrix, rather than its inverse,
5
+ is efficiently maintained and used to solve the linear systems at each
6
+ iteration of the algorithm.
7
+
8
+ .. versionadded:: 1.3.0
9
+
10
+ References
11
+ ----------
12
+ .. [1] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
13
+ programming." Athena Scientific 1 (1997): 997.
14
+ .. [2] Bartels, Richard H. "A stabilization of the simplex method."
15
+ Journal in Numerische Mathematik 16.5 (1971): 414-434.
16
+
17
+ """
18
+ # Author: Matt Haberland
19
+
20
+ import numpy as np
21
+ from numpy.linalg import LinAlgError
22
+
23
+ from scipy.linalg import solve
24
+ from ._optimize import _check_unknown_options
25
+ from ._bglu_dense import LU
26
+ from ._bglu_dense import BGLU as BGLU
27
+ from ._linprog_util import _postsolve
28
+ from ._optimize import OptimizeResult
29
+
30
+
31
+ def _phase_one(A, b, x0, callback, postsolve_args, maxiter, tol, disp,
32
+ maxupdate, mast, pivot):
33
+ """
34
+ The purpose of phase one is to find an initial basic feasible solution
35
+ (BFS) to the original problem.
36
+
37
+ Generates an auxiliary problem with a trivial BFS and an objective that
38
+ minimizes infeasibility of the original problem. Solves the auxiliary
39
+ problem using the main simplex routine (phase two). This either yields
40
+ a BFS to the original problem or determines that the original problem is
41
+ infeasible. If feasible, phase one detects redundant rows in the original
42
+ constraint matrix and removes them, then chooses additional indices as
43
+ necessary to complete a basis/BFS for the original problem.
44
+ """
45
+
46
+ m, n = A.shape
47
+ status = 0
48
+
49
+ # generate auxiliary problem to get initial BFS
50
+ A, b, c, basis, x, status = _generate_auxiliary_problem(A, b, x0, tol)
51
+
52
+ if status == 6:
53
+ residual = c.dot(x)
54
+ iter_k = 0
55
+ return x, basis, A, b, residual, status, iter_k
56
+
57
+ # solve auxiliary problem
58
+ phase_one_n = n
59
+ iter_k = 0
60
+ x, basis, status, iter_k = _phase_two(c, A, x, basis, callback,
61
+ postsolve_args,
62
+ maxiter, tol, disp,
63
+ maxupdate, mast, pivot,
64
+ iter_k, phase_one_n)
65
+
66
+ # check for infeasibility
67
+ residual = c.dot(x)
68
+ if status == 0 and residual > tol:
69
+ status = 2
70
+
71
+ # drive artificial variables out of basis
72
+ # TODO: test redundant row removal better
73
+ # TODO: make solve more efficient with BGLU? This could take a while.
74
+ keep_rows = np.ones(m, dtype=bool)
75
+ for basis_column in basis[basis >= n]:
76
+ B = A[:, basis]
77
+ try:
78
+ basis_finder = np.abs(solve(B, A)) # inefficient
79
+ pertinent_row = np.argmax(basis_finder[:, basis_column])
80
+ eligible_columns = np.ones(n, dtype=bool)
81
+ eligible_columns[basis[basis < n]] = 0
82
+ eligible_column_indices = np.where(eligible_columns)[0]
83
+ index = np.argmax(basis_finder[:, :n]
84
+ [pertinent_row, eligible_columns])
85
+ new_basis_column = eligible_column_indices[index]
86
+ if basis_finder[pertinent_row, new_basis_column] < tol:
87
+ keep_rows[pertinent_row] = False
88
+ else:
89
+ basis[basis == basis_column] = new_basis_column
90
+ except LinAlgError:
91
+ status = 4
92
+
93
+ # form solution to original problem
94
+ A = A[keep_rows, :n]
95
+ basis = basis[keep_rows]
96
+ x = x[:n]
97
+ m = A.shape[0]
98
+ return x, basis, A, b, residual, status, iter_k
99
+
100
+
101
+ def _get_more_basis_columns(A, basis):
102
+ """
103
+ Called when the auxiliary problem terminates with artificial columns in
104
+ the basis, which must be removed and replaced with non-artificial
105
+ columns. Finds additional columns that do not make the matrix singular.
106
+ """
107
+ m, n = A.shape
108
+
109
+ # options for inclusion are those that aren't already in the basis
110
+ a = np.arange(m+n)
111
+ bl = np.zeros(len(a), dtype=bool)
112
+ bl[basis] = 1
113
+ options = a[~bl]
114
+ options = options[options < n] # and they have to be non-artificial
115
+
116
+ # form basis matrix
117
+ B = np.zeros((m, m))
118
+ B[:, 0:len(basis)] = A[:, basis]
119
+
120
+ if (basis.size > 0 and
121
+ np.linalg.matrix_rank(B[:, :len(basis)]) < len(basis)):
122
+ raise Exception("Basis has dependent columns")
123
+
124
+ rank = 0 # just enter the loop
125
+ for i in range(n): # somewhat arbitrary, but we need another way out
126
+ # permute the options, and take as many as needed
127
+ new_basis = np.random.permutation(options)[:m-len(basis)]
128
+ B[:, len(basis):] = A[:, new_basis] # update the basis matrix
129
+ rank = np.linalg.matrix_rank(B) # check the rank
130
+ if rank == m:
131
+ break
132
+
133
+ return np.concatenate((basis, new_basis))
134
+
135
+
136
+ def _generate_auxiliary_problem(A, b, x0, tol):
137
+ """
138
+ Modifies original problem to create an auxiliary problem with a trivial
139
+ initial basic feasible solution and an objective that minimizes
140
+ infeasibility in the original problem.
141
+
142
+ Conceptually, this is done by stacking an identity matrix on the right of
143
+ the original constraint matrix, adding artificial variables to correspond
144
+ with each of these new columns, and generating a cost vector that is all
145
+ zeros except for ones corresponding with each of the new variables.
146
+
147
+ A initial basic feasible solution is trivial: all variables are zero
148
+ except for the artificial variables, which are set equal to the
149
+ corresponding element of the right hand side `b`.
150
+
151
+ Running the simplex method on this auxiliary problem drives all of the
152
+ artificial variables - and thus the cost - to zero if the original problem
153
+ is feasible. The original problem is declared infeasible otherwise.
154
+
155
+ Much of the complexity below is to improve efficiency by using singleton
156
+ columns in the original problem where possible, thus generating artificial
157
+ variables only as necessary, and using an initial 'guess' basic feasible
158
+ solution.
159
+ """
160
+ status = 0
161
+ m, n = A.shape
162
+
163
+ if x0 is not None:
164
+ x = x0
165
+ else:
166
+ x = np.zeros(n)
167
+
168
+ r = b - A@x # residual; this must be all zeros for feasibility
169
+
170
+ A[r < 0] = -A[r < 0] # express problem with RHS positive for trivial BFS
171
+ b[r < 0] = -b[r < 0] # to the auxiliary problem
172
+ r[r < 0] *= -1
173
+
174
+ # Rows which we will need to find a trivial way to zero.
175
+ # This should just be the rows where there is a nonzero residual.
176
+ # But then we would not necessarily have a column singleton in every row.
177
+ # This makes it difficult to find an initial basis.
178
+ if x0 is None:
179
+ nonzero_constraints = np.arange(m)
180
+ else:
181
+ nonzero_constraints = np.where(r > tol)[0]
182
+
183
+ # these are (at least some of) the initial basis columns
184
+ basis = np.where(np.abs(x) > tol)[0]
185
+
186
+ if len(nonzero_constraints) == 0 and len(basis) <= m: # already a BFS
187
+ c = np.zeros(n)
188
+ basis = _get_more_basis_columns(A, basis)
189
+ return A, b, c, basis, x, status
190
+ elif (len(nonzero_constraints) > m - len(basis) or
191
+ np.any(x < 0)): # can't get trivial BFS
192
+ c = np.zeros(n)
193
+ status = 6
194
+ return A, b, c, basis, x, status
195
+
196
+ # chooses existing columns appropriate for inclusion in initial basis
197
+ cols, rows = _select_singleton_columns(A, r)
198
+
199
+ # find the rows we need to zero that we _can_ zero with column singletons
200
+ i_tofix = np.isin(rows, nonzero_constraints)
201
+ # these columns can't already be in the basis, though
202
+ # we are going to add them to the basis and change the corresponding x val
203
+ i_notinbasis = np.logical_not(np.isin(cols, basis))
204
+ i_fix_without_aux = np.logical_and(i_tofix, i_notinbasis)
205
+ rows = rows[i_fix_without_aux]
206
+ cols = cols[i_fix_without_aux]
207
+
208
+ # indices of the rows we can only zero with auxiliary variable
209
+ # these rows will get a one in each auxiliary column
210
+ arows = nonzero_constraints[np.logical_not(
211
+ np.isin(nonzero_constraints, rows))]
212
+ n_aux = len(arows)
213
+ acols = n + np.arange(n_aux) # indices of auxiliary columns
214
+
215
+ basis_ng = np.concatenate((cols, acols)) # basis columns not from guess
216
+ basis_ng_rows = np.concatenate((rows, arows)) # rows we need to zero
217
+
218
+ # add auxiliary singleton columns
219
+ A = np.hstack((A, np.zeros((m, n_aux))))
220
+ A[arows, acols] = 1
221
+
222
+ # generate initial BFS
223
+ x = np.concatenate((x, np.zeros(n_aux)))
224
+ x[basis_ng] = r[basis_ng_rows]/A[basis_ng_rows, basis_ng]
225
+
226
+ # generate costs to minimize infeasibility
227
+ c = np.zeros(n_aux + n)
228
+ c[acols] = 1
229
+
230
+ # basis columns correspond with nonzeros in guess, those with column
231
+ # singletons we used to zero remaining constraints, and any additional
232
+ # columns to get a full set (m columns)
233
+ basis = np.concatenate((basis, basis_ng))
234
+ basis = _get_more_basis_columns(A, basis) # add columns as needed
235
+
236
+ return A, b, c, basis, x, status
237
+
238
+
239
+ def _select_singleton_columns(A, b):
240
+ """
241
+ Finds singleton columns for which the singleton entry is of the same sign
242
+ as the right-hand side; these columns are eligible for inclusion in an
243
+ initial basis. Determines the rows in which the singleton entries are
244
+ located. For each of these rows, returns the indices of the one singleton
245
+ column and its corresponding row.
246
+ """
247
+ # find indices of all singleton columns and corresponding row indices
248
+ column_indices = np.nonzero(np.sum(np.abs(A) != 0, axis=0) == 1)[0]
249
+ columns = A[:, column_indices] # array of singleton columns
250
+ row_indices = np.zeros(len(column_indices), dtype=int)
251
+ nonzero_rows, nonzero_columns = np.nonzero(columns)
252
+ row_indices[nonzero_columns] = nonzero_rows # corresponding row indices
253
+
254
+ # keep only singletons with entries that have same sign as RHS
255
+ # this is necessary because all elements of BFS must be non-negative
256
+ same_sign = A[row_indices, column_indices]*b[row_indices] >= 0
257
+ column_indices = column_indices[same_sign][::-1]
258
+ row_indices = row_indices[same_sign][::-1]
259
+ # Reversing the order so that steps below select rightmost columns
260
+ # for initial basis, which will tend to be slack variables. (If the
261
+ # guess corresponds with a basic feasible solution but a constraint
262
+ # is not satisfied with the corresponding slack variable zero, the slack
263
+ # variable must be basic.)
264
+
265
+ # for each row, keep rightmost singleton column with an entry in that row
266
+ unique_row_indices, first_columns = np.unique(row_indices,
267
+ return_index=True)
268
+ return column_indices[first_columns], unique_row_indices
269
+
270
+
271
+ def _find_nonzero_rows(A, tol):
272
+ """
273
+ Returns logical array indicating the locations of rows with at least
274
+ one nonzero element.
275
+ """
276
+ return np.any(np.abs(A) > tol, axis=1)
277
+
278
+
279
+ def _select_enter_pivot(c_hat, bl, a, rule="bland", tol=1e-12):
280
+ """
281
+ Selects a pivot to enter the basis. Currently Bland's rule - the smallest
282
+ index that has a negative reduced cost - is the default.
283
+ """
284
+ if rule.lower() == "mrc": # index with minimum reduced cost
285
+ return a[~bl][np.argmin(c_hat)]
286
+ else: # smallest index w/ negative reduced cost
287
+ return a[~bl][c_hat < -tol][0]
288
+
289
+
290
+ def _display_iter(phase, iteration, slack, con, fun):
291
+ """
292
+ Print indicators of optimization status to the console.
293
+ """
294
+ header = True if not iteration % 20 else False
295
+
296
+ if header:
297
+ print("Phase",
298
+ "Iteration",
299
+ "Minimum Slack ",
300
+ "Constraint Residual",
301
+ "Objective ")
302
+
303
+ # :<X.Y left aligns Y digits in X digit spaces
304
+ fmt = '{0:<6}{1:<10}{2:<20.13}{3:<20.13}{4:<20.13}'
305
+ try:
306
+ slack = np.min(slack)
307
+ except ValueError:
308
+ slack = "NA"
309
+ print(fmt.format(phase, iteration, slack, np.linalg.norm(con), fun))
310
+
311
+
312
+ def _display_and_callback(phase_one_n, x, postsolve_args, status,
313
+ iteration, disp, callback):
314
+ if phase_one_n is not None:
315
+ phase = 1
316
+ x_postsolve = x[:phase_one_n]
317
+ else:
318
+ phase = 2
319
+ x_postsolve = x
320
+ x_o, fun, slack, con = _postsolve(x_postsolve,
321
+ postsolve_args)
322
+
323
+ if callback is not None:
324
+ res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
325
+ 'con': con, 'nit': iteration,
326
+ 'phase': phase, 'complete': False,
327
+ 'status': status, 'message': "",
328
+ 'success': False})
329
+ callback(res)
330
+ if disp:
331
+ _display_iter(phase, iteration, slack, con, fun)
332
+
333
+
334
+ def _phase_two(c, A, x, b, callback, postsolve_args, maxiter, tol, disp,
335
+ maxupdate, mast, pivot, iteration=0, phase_one_n=None):
336
+ """
337
+ The heart of the simplex method. Beginning with a basic feasible solution,
338
+ moves to adjacent basic feasible solutions successively lower reduced cost.
339
+ Terminates when there are no basic feasible solutions with lower reduced
340
+ cost or if the problem is determined to be unbounded.
341
+
342
+ This implementation follows the revised simplex method based on LU
343
+ decomposition. Rather than maintaining a tableau or an inverse of the
344
+ basis matrix, we keep a factorization of the basis matrix that allows
345
+ efficient solution of linear systems while avoiding stability issues
346
+ associated with inverted matrices.
347
+ """
348
+ m, n = A.shape
349
+ status = 0
350
+ a = np.arange(n) # indices of columns of A
351
+ ab = np.arange(m) # indices of columns of B
352
+ if maxupdate:
353
+ # basis matrix factorization object; similar to B = A[:, b]
354
+ B = BGLU(A, b, maxupdate, mast)
355
+ else:
356
+ B = LU(A, b)
357
+
358
+ for iteration in range(iteration, maxiter):
359
+
360
+ if disp or callback is not None:
361
+ _display_and_callback(phase_one_n, x, postsolve_args, status,
362
+ iteration, disp, callback)
363
+
364
+ bl = np.zeros(len(a), dtype=bool)
365
+ bl[b] = 1
366
+
367
+ xb = x[b] # basic variables
368
+ cb = c[b] # basic costs
369
+
370
+ try:
371
+ v = B.solve(cb, transposed=True) # similar to v = solve(B.T, cb)
372
+ except LinAlgError:
373
+ status = 4
374
+ break
375
+
376
+ # TODO: cythonize?
377
+ c_hat = c - v.dot(A) # reduced cost
378
+ c_hat = c_hat[~bl]
379
+ # Above is much faster than:
380
+ # N = A[:, ~bl] # slow!
381
+ # c_hat = c[~bl] - v.T.dot(N)
382
+ # Can we perform the multiplication only on the nonbasic columns?
383
+
384
+ if np.all(c_hat >= -tol): # all reduced costs positive -> terminate
385
+ break
386
+
387
+ j = _select_enter_pivot(c_hat, bl, a, rule=pivot, tol=tol)
388
+ u = B.solve(A[:, j]) # similar to u = solve(B, A[:, j])
389
+
390
+ i = u > tol # if none of the u are positive, unbounded
391
+ if not np.any(i):
392
+ status = 3
393
+ break
394
+
395
+ th = xb[i]/u[i]
396
+ l = np.argmin(th) # implicitly selects smallest subscript
397
+ th_star = th[l] # step size
398
+
399
+ x[b] = x[b] - th_star*u # take step
400
+ x[j] = th_star
401
+ B.update(ab[i][l], j) # modify basis
402
+ b = B.b # similar to b[ab[i][l]] =
403
+
404
+ else:
405
+ # If the end of the for loop is reached (without a break statement),
406
+ # then another step has been taken, so the iteration counter should
407
+ # increment, info should be displayed, and callback should be called.
408
+ iteration += 1
409
+ status = 1
410
+ if disp or callback is not None:
411
+ _display_and_callback(phase_one_n, x, postsolve_args, status,
412
+ iteration, disp, callback)
413
+
414
+ return x, b, status, iteration
415
+
416
+
417
+ def _linprog_rs(c, c0, A, b, x0, callback, postsolve_args,
418
+ maxiter=5000, tol=1e-12, disp=False,
419
+ maxupdate=10, mast=False, pivot="mrc",
420
+ **unknown_options):
421
+ """
422
+ Solve the following linear programming problem via a two-phase
423
+ revised simplex algorithm.::
424
+
425
+ minimize: c @ x
426
+
427
+ subject to: A @ x == b
428
+ 0 <= x < oo
429
+
430
+ User-facing documentation is in _linprog_doc.py.
431
+
432
+ Parameters
433
+ ----------
434
+ c : 1-D array
435
+ Coefficients of the linear objective function to be minimized.
436
+ c0 : float
437
+ Constant term in objective function due to fixed (and eliminated)
438
+ variables. (Currently unused.)
439
+ A : 2-D array
440
+ 2-D array which, when matrix-multiplied by ``x``, gives the values of
441
+ the equality constraints at ``x``.
442
+ b : 1-D array
443
+ 1-D array of values representing the RHS of each equality constraint
444
+ (row) in ``A_eq``.
445
+ x0 : 1-D array, optional
446
+ Starting values of the independent variables, which will be refined by
447
+ the optimization algorithm. For the revised simplex method, these must
448
+ correspond with a basic feasible solution.
449
+ callback : callable, optional
450
+ If a callback function is provided, it will be called within each
451
+ iteration of the algorithm. The callback function must accept a single
452
+ `scipy.optimize.OptimizeResult` consisting of the following fields:
453
+
454
+ x : 1-D array
455
+ Current solution vector.
456
+ fun : float
457
+ Current value of the objective function ``c @ x``.
458
+ success : bool
459
+ True only when an algorithm has completed successfully,
460
+ so this is always False as the callback function is called
461
+ only while the algorithm is still iterating.
462
+ slack : 1-D array
463
+ The values of the slack variables. Each slack variable
464
+ corresponds to an inequality constraint. If the slack is zero,
465
+ the corresponding constraint is active.
466
+ con : 1-D array
467
+ The (nominally zero) residuals of the equality constraints,
468
+ that is, ``b - A_eq @ x``.
469
+ phase : int
470
+ The phase of the algorithm being executed.
471
+ status : int
472
+ For revised simplex, this is always 0 because if a different
473
+ status is detected, the algorithm terminates.
474
+ nit : int
475
+ The number of iterations performed.
476
+ message : str
477
+ A string descriptor of the exit status of the optimization.
478
+ postsolve_args : tuple
479
+ Data needed by _postsolve to convert the solution to the standard-form
480
+ problem into the solution to the original problem.
481
+
482
+ Options
483
+ -------
484
+ maxiter : int
485
+ The maximum number of iterations to perform in either phase.
486
+ tol : float
487
+ The tolerance which determines when a solution is "close enough" to
488
+ zero in Phase 1 to be considered a basic feasible solution or close
489
+ enough to positive to serve as an optimal solution.
490
+ disp : bool
491
+ Set to ``True`` if indicators of optimization status are to be printed
492
+ to the console each iteration.
493
+ maxupdate : int
494
+ The maximum number of updates performed on the LU factorization.
495
+ After this many updates is reached, the basis matrix is factorized
496
+ from scratch.
497
+ mast : bool
498
+ Minimize Amortized Solve Time. If enabled, the average time to solve
499
+ a linear system using the basis factorization is measured. Typically,
500
+ the average solve time will decrease with each successive solve after
501
+ initial factorization, as factorization takes much more time than the
502
+ solve operation (and updates). Eventually, however, the updated
503
+ factorization becomes sufficiently complex that the average solve time
504
+ begins to increase. When this is detected, the basis is refactorized
505
+ from scratch. Enable this option to maximize speed at the risk of
506
+ nondeterministic behavior. Ignored if ``maxupdate`` is 0.
507
+ pivot : "mrc" or "bland"
508
+ Pivot rule: Minimum Reduced Cost (default) or Bland's rule. Choose
509
+ Bland's rule if iteration limit is reached and cycling is suspected.
510
+ unknown_options : dict
511
+ Optional arguments not used by this particular solver. If
512
+ `unknown_options` is non-empty a warning is issued listing all
513
+ unused options.
514
+
515
+ Returns
516
+ -------
517
+ x : 1-D array
518
+ Solution vector.
519
+ status : int
520
+ An integer representing the exit status of the optimization::
521
+
522
+ 0 : Optimization terminated successfully
523
+ 1 : Iteration limit reached
524
+ 2 : Problem appears to be infeasible
525
+ 3 : Problem appears to be unbounded
526
+ 4 : Numerical difficulties encountered
527
+ 5 : No constraints; turn presolve on
528
+ 6 : Guess x0 cannot be converted to a basic feasible solution
529
+
530
+ message : str
531
+ A string descriptor of the exit status of the optimization.
532
+ iteration : int
533
+ The number of iterations taken to solve the problem.
534
+ """
535
+
536
+ _check_unknown_options(unknown_options)
537
+
538
+ messages = ["Optimization terminated successfully.",
539
+ "Iteration limit reached.",
540
+ "The problem appears infeasible, as the phase one auxiliary "
541
+ "problem terminated successfully with a residual of {0:.1e}, "
542
+ "greater than the tolerance {1} required for the solution to "
543
+ "be considered feasible. Consider increasing the tolerance to "
544
+ "be greater than {0:.1e}. If this tolerance is unnaceptably "
545
+ "large, the problem is likely infeasible.",
546
+ "The problem is unbounded, as the simplex algorithm found "
547
+ "a basic feasible solution from which there is a direction "
548
+ "with negative reduced cost in which all decision variables "
549
+ "increase.",
550
+ "Numerical difficulties encountered; consider trying "
551
+ "method='interior-point'.",
552
+ "Problems with no constraints are trivially solved; please "
553
+ "turn presolve on.",
554
+ "The guess x0 cannot be converted to a basic feasible "
555
+ "solution. "
556
+ ]
557
+
558
+ if A.size == 0: # address test_unbounded_below_no_presolve_corrected
559
+ return np.zeros(c.shape), 5, messages[5], 0
560
+
561
+ x, basis, A, b, residual, status, iteration = (
562
+ _phase_one(A, b, x0, callback, postsolve_args,
563
+ maxiter, tol, disp, maxupdate, mast, pivot))
564
+
565
+ if status == 0:
566
+ x, basis, status, iteration = _phase_two(c, A, x, basis, callback,
567
+ postsolve_args,
568
+ maxiter, tol, disp,
569
+ maxupdate, mast, pivot,
570
+ iteration)
571
+
572
+ return x, status, messages[status].format(residual, tol), iteration
emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_linprog_simplex.py ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Simplex method for linear programming
2
+
3
+ The *simplex* method uses a traditional, full-tableau implementation of
4
+ Dantzig's simplex algorithm [1]_, [2]_ (*not* the Nelder-Mead simplex).
5
+ This algorithm is included for backwards compatibility and educational
6
+ purposes.
7
+
8
+ .. versionadded:: 0.15.0
9
+
10
+ Warnings
11
+ --------
12
+
13
+ The simplex method may encounter numerical difficulties when pivot
14
+ values are close to the specified tolerance. If encountered try
15
+ remove any redundant constraints, change the pivot strategy to Bland's
16
+ rule or increase the tolerance value.
17
+
18
+ Alternatively, more robust methods maybe be used. See
19
+ :ref:`'interior-point' <optimize.linprog-interior-point>` and
20
+ :ref:`'revised simplex' <optimize.linprog-revised_simplex>`.
21
+
22
+ References
23
+ ----------
24
+ .. [1] Dantzig, George B., Linear programming and extensions. Rand
25
+ Corporation Research Study Princeton Univ. Press, Princeton, NJ,
26
+ 1963
27
+ .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
28
+ Mathematical Programming", McGraw-Hill, Chapter 4.
29
+ """
30
+
31
+ import numpy as np
32
+ from warnings import warn
33
+ from ._optimize import OptimizeResult, OptimizeWarning, _check_unknown_options
34
+ from ._linprog_util import _postsolve
35
+
36
+
37
+ def _pivot_col(T, tol=1e-9, bland=False):
38
+ """
39
+ Given a linear programming simplex tableau, determine the column
40
+ of the variable to enter the basis.
41
+
42
+ Parameters
43
+ ----------
44
+ T : 2-D array
45
+ A 2-D array representing the simplex tableau, T, corresponding to the
46
+ linear programming problem. It should have the form:
47
+
48
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
49
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
50
+ .
51
+ .
52
+ .
53
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
54
+ [c[0], c[1], ..., c[n_total], 0]]
55
+
56
+ for a Phase 2 problem, or the form:
57
+
58
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
59
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
60
+ .
61
+ .
62
+ .
63
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
64
+ [c[0], c[1], ..., c[n_total], 0],
65
+ [c'[0], c'[1], ..., c'[n_total], 0]]
66
+
67
+ for a Phase 1 problem (a problem in which a basic feasible solution is
68
+ sought prior to maximizing the actual objective. ``T`` is modified in
69
+ place by ``_solve_simplex``.
70
+ tol : float
71
+ Elements in the objective row larger than -tol will not be considered
72
+ for pivoting. Nominally this value is zero, but numerical issues
73
+ cause a tolerance about zero to be necessary.
74
+ bland : bool
75
+ If True, use Bland's rule for selection of the column (select the
76
+ first column with a negative coefficient in the objective row,
77
+ regardless of magnitude).
78
+
79
+ Returns
80
+ -------
81
+ status: bool
82
+ True if a suitable pivot column was found, otherwise False.
83
+ A return of False indicates that the linear programming simplex
84
+ algorithm is complete.
85
+ col: int
86
+ The index of the column of the pivot element.
87
+ If status is False, col will be returned as nan.
88
+ """
89
+ ma = np.ma.masked_where(T[-1, :-1] >= -tol, T[-1, :-1], copy=False)
90
+ if ma.count() == 0:
91
+ return False, np.nan
92
+ if bland:
93
+ # ma.mask is sometimes 0d
94
+ return True, np.nonzero(np.logical_not(np.atleast_1d(ma.mask)))[0][0]
95
+ return True, np.ma.nonzero(ma == ma.min())[0][0]
96
+
97
+
98
+ def _pivot_row(T, basis, pivcol, phase, tol=1e-9, bland=False):
99
+ """
100
+ Given a linear programming simplex tableau, determine the row for the
101
+ pivot operation.
102
+
103
+ Parameters
104
+ ----------
105
+ T : 2-D array
106
+ A 2-D array representing the simplex tableau, T, corresponding to the
107
+ linear programming problem. It should have the form:
108
+
109
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
110
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
111
+ .
112
+ .
113
+ .
114
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
115
+ [c[0], c[1], ..., c[n_total], 0]]
116
+
117
+ for a Phase 2 problem, or the form:
118
+
119
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
120
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
121
+ .
122
+ .
123
+ .
124
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
125
+ [c[0], c[1], ..., c[n_total], 0],
126
+ [c'[0], c'[1], ..., c'[n_total], 0]]
127
+
128
+ for a Phase 1 problem (a Problem in which a basic feasible solution is
129
+ sought prior to maximizing the actual objective. ``T`` is modified in
130
+ place by ``_solve_simplex``.
131
+ basis : array
132
+ A list of the current basic variables.
133
+ pivcol : int
134
+ The index of the pivot column.
135
+ phase : int
136
+ The phase of the simplex algorithm (1 or 2).
137
+ tol : float
138
+ Elements in the pivot column smaller than tol will not be considered
139
+ for pivoting. Nominally this value is zero, but numerical issues
140
+ cause a tolerance about zero to be necessary.
141
+ bland : bool
142
+ If True, use Bland's rule for selection of the row (if more than one
143
+ row can be used, choose the one with the lowest variable index).
144
+
145
+ Returns
146
+ -------
147
+ status: bool
148
+ True if a suitable pivot row was found, otherwise False. A return
149
+ of False indicates that the linear programming problem is unbounded.
150
+ row: int
151
+ The index of the row of the pivot element. If status is False, row
152
+ will be returned as nan.
153
+ """
154
+ if phase == 1:
155
+ k = 2
156
+ else:
157
+ k = 1
158
+ ma = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, pivcol], copy=False)
159
+ if ma.count() == 0:
160
+ return False, np.nan
161
+ mb = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, -1], copy=False)
162
+ q = mb / ma
163
+ min_rows = np.ma.nonzero(q == q.min())[0]
164
+ if bland:
165
+ return True, min_rows[np.argmin(np.take(basis, min_rows))]
166
+ return True, min_rows[0]
167
+
168
+
169
+ def _apply_pivot(T, basis, pivrow, pivcol, tol=1e-9):
170
+ """
171
+ Pivot the simplex tableau inplace on the element given by (pivrow, pivol).
172
+ The entering variable corresponds to the column given by pivcol forcing
173
+ the variable basis[pivrow] to leave the basis.
174
+
175
+ Parameters
176
+ ----------
177
+ T : 2-D array
178
+ A 2-D array representing the simplex tableau, T, corresponding to the
179
+ linear programming problem. It should have the form:
180
+
181
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
182
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
183
+ .
184
+ .
185
+ .
186
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
187
+ [c[0], c[1], ..., c[n_total], 0]]
188
+
189
+ for a Phase 2 problem, or the form:
190
+
191
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
192
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
193
+ .
194
+ .
195
+ .
196
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
197
+ [c[0], c[1], ..., c[n_total], 0],
198
+ [c'[0], c'[1], ..., c'[n_total], 0]]
199
+
200
+ for a Phase 1 problem (a problem in which a basic feasible solution is
201
+ sought prior to maximizing the actual objective. ``T`` is modified in
202
+ place by ``_solve_simplex``.
203
+ basis : 1-D array
204
+ An array of the indices of the basic variables, such that basis[i]
205
+ contains the column corresponding to the basic variable for row i.
206
+ Basis is modified in place by _apply_pivot.
207
+ pivrow : int
208
+ Row index of the pivot.
209
+ pivcol : int
210
+ Column index of the pivot.
211
+ """
212
+ basis[pivrow] = pivcol
213
+ pivval = T[pivrow, pivcol]
214
+ T[pivrow] = T[pivrow] / pivval
215
+ for irow in range(T.shape[0]):
216
+ if irow != pivrow:
217
+ T[irow] = T[irow] - T[pivrow] * T[irow, pivcol]
218
+
219
+ # The selected pivot should never lead to a pivot value less than the tol.
220
+ if np.isclose(pivval, tol, atol=0, rtol=1e4):
221
+ message = (
222
+ f"The pivot operation produces a pivot value of:{pivval: .1e}, "
223
+ "which is only slightly greater than the specified "
224
+ f"tolerance{tol: .1e}. This may lead to issues regarding the "
225
+ "numerical stability of the simplex method. "
226
+ "Removing redundant constraints, changing the pivot strategy "
227
+ "via Bland's rule or increasing the tolerance may "
228
+ "help reduce the issue.")
229
+ warn(message, OptimizeWarning, stacklevel=5)
230
+
231
+
232
+ def _solve_simplex(T, n, basis, callback, postsolve_args,
233
+ maxiter=1000, tol=1e-9, phase=2, bland=False, nit0=0,
234
+ ):
235
+ """
236
+ Solve a linear programming problem in "standard form" using the Simplex
237
+ Method. Linear Programming is intended to solve the following problem form:
238
+
239
+ Minimize::
240
+
241
+ c @ x
242
+
243
+ Subject to::
244
+
245
+ A @ x == b
246
+ x >= 0
247
+
248
+ Parameters
249
+ ----------
250
+ T : 2-D array
251
+ A 2-D array representing the simplex tableau, T, corresponding to the
252
+ linear programming problem. It should have the form:
253
+
254
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
255
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
256
+ .
257
+ .
258
+ .
259
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
260
+ [c[0], c[1], ..., c[n_total], 0]]
261
+
262
+ for a Phase 2 problem, or the form:
263
+
264
+ [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
265
+ [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
266
+ .
267
+ .
268
+ .
269
+ [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
270
+ [c[0], c[1], ..., c[n_total], 0],
271
+ [c'[0], c'[1], ..., c'[n_total], 0]]
272
+
273
+ for a Phase 1 problem (a problem in which a basic feasible solution is
274
+ sought prior to maximizing the actual objective. ``T`` is modified in
275
+ place by ``_solve_simplex``.
276
+ n : int
277
+ The number of true variables in the problem.
278
+ basis : 1-D array
279
+ An array of the indices of the basic variables, such that basis[i]
280
+ contains the column corresponding to the basic variable for row i.
281
+ Basis is modified in place by _solve_simplex
282
+ callback : callable, optional
283
+ If a callback function is provided, it will be called within each
284
+ iteration of the algorithm. The callback must accept a
285
+ `scipy.optimize.OptimizeResult` consisting of the following fields:
286
+
287
+ x : 1-D array
288
+ Current solution vector
289
+ fun : float
290
+ Current value of the objective function
291
+ success : bool
292
+ True only when a phase has completed successfully. This
293
+ will be False for most iterations.
294
+ slack : 1-D array
295
+ The values of the slack variables. Each slack variable
296
+ corresponds to an inequality constraint. If the slack is zero,
297
+ the corresponding constraint is active.
298
+ con : 1-D array
299
+ The (nominally zero) residuals of the equality constraints,
300
+ that is, ``b - A_eq @ x``
301
+ phase : int
302
+ The phase of the optimization being executed. In phase 1 a basic
303
+ feasible solution is sought and the T has an additional row
304
+ representing an alternate objective function.
305
+ status : int
306
+ An integer representing the exit status of the optimization::
307
+
308
+ 0 : Optimization terminated successfully
309
+ 1 : Iteration limit reached
310
+ 2 : Problem appears to be infeasible
311
+ 3 : Problem appears to be unbounded
312
+ 4 : Serious numerical difficulties encountered
313
+
314
+ nit : int
315
+ The number of iterations performed.
316
+ message : str
317
+ A string descriptor of the exit status of the optimization.
318
+ postsolve_args : tuple
319
+ Data needed by _postsolve to convert the solution to the standard-form
320
+ problem into the solution to the original problem.
321
+ maxiter : int
322
+ The maximum number of iterations to perform before aborting the
323
+ optimization.
324
+ tol : float
325
+ The tolerance which determines when a solution is "close enough" to
326
+ zero in Phase 1 to be considered a basic feasible solution or close
327
+ enough to positive to serve as an optimal solution.
328
+ phase : int
329
+ The phase of the optimization being executed. In phase 1 a basic
330
+ feasible solution is sought and the T has an additional row
331
+ representing an alternate objective function.
332
+ bland : bool
333
+ If True, choose pivots using Bland's rule [3]_. In problems which
334
+ fail to converge due to cycling, using Bland's rule can provide
335
+ convergence at the expense of a less optimal path about the simplex.
336
+ nit0 : int
337
+ The initial iteration number used to keep an accurate iteration total
338
+ in a two-phase problem.
339
+
340
+ Returns
341
+ -------
342
+ nit : int
343
+ The number of iterations. Used to keep an accurate iteration total
344
+ in the two-phase problem.
345
+ status : int
346
+ An integer representing the exit status of the optimization::
347
+
348
+ 0 : Optimization terminated successfully
349
+ 1 : Iteration limit reached
350
+ 2 : Problem appears to be infeasible
351
+ 3 : Problem appears to be unbounded
352
+ 4 : Serious numerical difficulties encountered
353
+
354
+ """
355
+ nit = nit0
356
+ status = 0
357
+ message = ''
358
+ complete = False
359
+
360
+ if phase == 1:
361
+ m = T.shape[1]-2
362
+ elif phase == 2:
363
+ m = T.shape[1]-1
364
+ else:
365
+ raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
366
+
367
+ if phase == 2:
368
+ # Check if any artificial variables are still in the basis.
369
+ # If yes, check if any coefficients from this row and a column
370
+ # corresponding to one of the non-artificial variable is non-zero.
371
+ # If found, pivot at this term. If not, start phase 2.
372
+ # Do this for all artificial variables in the basis.
373
+ # Ref: "An Introduction to Linear Programming and Game Theory"
374
+ # by Paul R. Thie, Gerard E. Keough, 3rd Ed,
375
+ # Chapter 3.7 Redundant Systems (pag 102)
376
+ for pivrow in [row for row in range(basis.size)
377
+ if basis[row] > T.shape[1] - 2]:
378
+ non_zero_row = [col for col in range(T.shape[1] - 1)
379
+ if abs(T[pivrow, col]) > tol]
380
+ if len(non_zero_row) > 0:
381
+ pivcol = non_zero_row[0]
382
+ _apply_pivot(T, basis, pivrow, pivcol, tol)
383
+ nit += 1
384
+
385
+ if len(basis[:m]) == 0:
386
+ solution = np.empty(T.shape[1] - 1, dtype=np.float64)
387
+ else:
388
+ solution = np.empty(max(T.shape[1] - 1, max(basis[:m]) + 1),
389
+ dtype=np.float64)
390
+
391
+ while not complete:
392
+ # Find the pivot column
393
+ pivcol_found, pivcol = _pivot_col(T, tol, bland)
394
+ if not pivcol_found:
395
+ pivcol = np.nan
396
+ pivrow = np.nan
397
+ status = 0
398
+ complete = True
399
+ else:
400
+ # Find the pivot row
401
+ pivrow_found, pivrow = _pivot_row(T, basis, pivcol, phase, tol, bland)
402
+ if not pivrow_found:
403
+ status = 3
404
+ complete = True
405
+
406
+ if callback is not None:
407
+ solution[:] = 0
408
+ solution[basis[:n]] = T[:n, -1]
409
+ x = solution[:m]
410
+ x, fun, slack, con = _postsolve(
411
+ x, postsolve_args
412
+ )
413
+ res = OptimizeResult({
414
+ 'x': x,
415
+ 'fun': fun,
416
+ 'slack': slack,
417
+ 'con': con,
418
+ 'status': status,
419
+ 'message': message,
420
+ 'nit': nit,
421
+ 'success': status == 0 and complete,
422
+ 'phase': phase,
423
+ 'complete': complete,
424
+ })
425
+ callback(res)
426
+
427
+ if not complete:
428
+ if nit >= maxiter:
429
+ # Iteration limit exceeded
430
+ status = 1
431
+ complete = True
432
+ else:
433
+ _apply_pivot(T, basis, pivrow, pivcol, tol)
434
+ nit += 1
435
+ return nit, status
436
+
437
+
438
+ def _linprog_simplex(c, c0, A, b, callback, postsolve_args,
439
+ maxiter=1000, tol=1e-9, disp=False, bland=False,
440
+ **unknown_options):
441
+ """
442
+ Minimize a linear objective function subject to linear equality and
443
+ non-negativity constraints using the two phase simplex method.
444
+ Linear programming is intended to solve problems of the following form:
445
+
446
+ Minimize::
447
+
448
+ c @ x
449
+
450
+ Subject to::
451
+
452
+ A @ x == b
453
+ x >= 0
454
+
455
+ User-facing documentation is in _linprog_doc.py.
456
+
457
+ Parameters
458
+ ----------
459
+ c : 1-D array
460
+ Coefficients of the linear objective function to be minimized.
461
+ c0 : float
462
+ Constant term in objective function due to fixed (and eliminated)
463
+ variables. (Purely for display.)
464
+ A : 2-D array
465
+ 2-D array such that ``A @ x``, gives the values of the equality
466
+ constraints at ``x``.
467
+ b : 1-D array
468
+ 1-D array of values representing the right hand side of each equality
469
+ constraint (row) in ``A``.
470
+ callback : callable, optional
471
+ If a callback function is provided, it will be called within each
472
+ iteration of the algorithm. The callback function must accept a single
473
+ `scipy.optimize.OptimizeResult` consisting of the following fields:
474
+
475
+ x : 1-D array
476
+ Current solution vector
477
+ fun : float
478
+ Current value of the objective function
479
+ success : bool
480
+ True when an algorithm has completed successfully.
481
+ slack : 1-D array
482
+ The values of the slack variables. Each slack variable
483
+ corresponds to an inequality constraint. If the slack is zero,
484
+ the corresponding constraint is active.
485
+ con : 1-D array
486
+ The (nominally zero) residuals of the equality constraints,
487
+ that is, ``b - A_eq @ x``
488
+ phase : int
489
+ The phase of the algorithm being executed.
490
+ status : int
491
+ An integer representing the status of the optimization::
492
+
493
+ 0 : Algorithm proceeding nominally
494
+ 1 : Iteration limit reached
495
+ 2 : Problem appears to be infeasible
496
+ 3 : Problem appears to be unbounded
497
+ 4 : Serious numerical difficulties encountered
498
+ nit : int
499
+ The number of iterations performed.
500
+ message : str
501
+ A string descriptor of the exit status of the optimization.
502
+ postsolve_args : tuple
503
+ Data needed by _postsolve to convert the solution to the standard-form
504
+ problem into the solution to the original problem.
505
+
506
+ Options
507
+ -------
508
+ maxiter : int
509
+ The maximum number of iterations to perform.
510
+ disp : bool
511
+ If True, print exit status message to sys.stdout
512
+ tol : float
513
+ The tolerance which determines when a solution is "close enough" to
514
+ zero in Phase 1 to be considered a basic feasible solution or close
515
+ enough to positive to serve as an optimal solution.
516
+ bland : bool
517
+ If True, use Bland's anti-cycling rule [3]_ to choose pivots to
518
+ prevent cycling. If False, choose pivots which should lead to a
519
+ converged solution more quickly. The latter method is subject to
520
+ cycling (non-convergence) in rare instances.
521
+ unknown_options : dict
522
+ Optional arguments not used by this particular solver. If
523
+ `unknown_options` is non-empty a warning is issued listing all
524
+ unused options.
525
+
526
+ Returns
527
+ -------
528
+ x : 1-D array
529
+ Solution vector.
530
+ status : int
531
+ An integer representing the exit status of the optimization::
532
+
533
+ 0 : Optimization terminated successfully
534
+ 1 : Iteration limit reached
535
+ 2 : Problem appears to be infeasible
536
+ 3 : Problem appears to be unbounded
537
+ 4 : Serious numerical difficulties encountered
538
+
539
+ message : str
540
+ A string descriptor of the exit status of the optimization.
541
+ iteration : int
542
+ The number of iterations taken to solve the problem.
543
+
544
+ References
545
+ ----------
546
+ .. [1] Dantzig, George B., Linear programming and extensions. Rand
547
+ Corporation Research Study Princeton Univ. Press, Princeton, NJ,
548
+ 1963
549
+ .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
550
+ Mathematical Programming", McGraw-Hill, Chapter 4.
551
+ .. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
552
+ Mathematics of Operations Research (2), 1977: pp. 103-107.
553
+
554
+
555
+ Notes
556
+ -----
557
+ The expected problem formulation differs between the top level ``linprog``
558
+ module and the method specific solvers. The method specific solvers expect a
559
+ problem in standard form:
560
+
561
+ Minimize::
562
+
563
+ c @ x
564
+
565
+ Subject to::
566
+
567
+ A @ x == b
568
+ x >= 0
569
+
570
+ Whereas the top level ``linprog`` module expects a problem of form:
571
+
572
+ Minimize::
573
+
574
+ c @ x
575
+
576
+ Subject to::
577
+
578
+ A_ub @ x <= b_ub
579
+ A_eq @ x == b_eq
580
+ lb <= x <= ub
581
+
582
+ where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
583
+
584
+ The original problem contains equality, upper-bound and variable constraints
585
+ whereas the method specific solver requires equality constraints and
586
+ variable non-negativity.
587
+
588
+ ``linprog`` module converts the original problem to standard form by
589
+ converting the simple bounds to upper bound constraints, introducing
590
+ non-negative slack variables for inequality constraints, and expressing
591
+ unbounded variables as the difference between two non-negative variables.
592
+ """
593
+ _check_unknown_options(unknown_options)
594
+
595
+ status = 0
596
+ messages = {0: "Optimization terminated successfully.",
597
+ 1: "Iteration limit reached.",
598
+ 2: "Optimization failed. Unable to find a feasible"
599
+ " starting point.",
600
+ 3: "Optimization failed. The problem appears to be unbounded.",
601
+ 4: "Optimization failed. Singular matrix encountered."}
602
+
603
+ n, m = A.shape
604
+
605
+ # All constraints must have b >= 0.
606
+ is_negative_constraint = np.less(b, 0)
607
+ A[is_negative_constraint] *= -1
608
+ b[is_negative_constraint] *= -1
609
+
610
+ # As all constraints are equality constraints the artificial variables
611
+ # will also be basic variables.
612
+ av = np.arange(n) + m
613
+ basis = av.copy()
614
+
615
+ # Format the phase one tableau by adding artificial variables and stacking
616
+ # the constraints, the objective row and pseudo-objective row.
617
+ row_constraints = np.hstack((A, np.eye(n), b[:, np.newaxis]))
618
+ row_objective = np.hstack((c, np.zeros(n), c0))
619
+ row_pseudo_objective = -row_constraints.sum(axis=0)
620
+ row_pseudo_objective[av] = 0
621
+ T = np.vstack((row_constraints, row_objective, row_pseudo_objective))
622
+
623
+ nit1, status = _solve_simplex(T, n, basis, callback=callback,
624
+ postsolve_args=postsolve_args,
625
+ maxiter=maxiter, tol=tol, phase=1,
626
+ bland=bland
627
+ )
628
+ # if pseudo objective is zero, remove the last row from the tableau and
629
+ # proceed to phase 2
630
+ nit2 = nit1
631
+ if abs(T[-1, -1]) < tol:
632
+ # Remove the pseudo-objective row from the tableau
633
+ T = T[:-1, :]
634
+ # Remove the artificial variable columns from the tableau
635
+ T = np.delete(T, av, 1)
636
+ else:
637
+ # Failure to find a feasible starting point
638
+ status = 2
639
+ messages[status] = (
640
+ "Phase 1 of the simplex method failed to find a feasible "
641
+ "solution. The pseudo-objective function evaluates to {0:.1e} "
642
+ "which exceeds the required tolerance of {1} for a solution to be "
643
+ "considered 'close enough' to zero to be a basic solution. "
644
+ "Consider increasing the tolerance to be greater than {0:.1e}. "
645
+ "If this tolerance is unacceptably large the problem may be "
646
+ "infeasible.".format(abs(T[-1, -1]), tol)
647
+ )
648
+
649
+ if status == 0:
650
+ # Phase 2
651
+ nit2, status = _solve_simplex(T, n, basis, callback=callback,
652
+ postsolve_args=postsolve_args,
653
+ maxiter=maxiter, tol=tol, phase=2,
654
+ bland=bland, nit0=nit1
655
+ )
656
+
657
+ solution = np.zeros(n + m)
658
+ solution[basis[:n]] = T[:n, -1]
659
+ x = solution[:m]
660
+
661
+ return x, status, messages[status], int(nit2)