diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8dfdafa42b40410359d8a2446c07cdb1273904ff
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_basinhopping.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_basinhopping.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5fb39fedc14ba3b299082803c68cb5d99b75345
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_basinhopping.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_bracket.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_bracket.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dacd7914b7b521bd28e816ecf7c373c448001871
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_bracket.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_chandrupatla.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_chandrupatla.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d51470126385c520a5a8e6d1a291303c6c25663
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_chandrupatla.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyla_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyla_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb76e503ae82907eb1c22603145bcf66e29778eb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyla_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyqa_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyqa_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8405752de66a6807f8a9b97458ad32c63fad8d38
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_cobyqa_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_constraints.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_constraints.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43c5d1cd19852013124dbe9b9e1b26afcb38a7c0
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_constraints.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dcsrch.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dcsrch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b6d05c8282f4178ceda1ee2c8b9877051a134fc9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dcsrch.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiable_functions.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiable_functions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35dc64fc02cfc5667b046be57fefc3bb138d0e82
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiable_functions.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentialevolution.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentialevolution.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..702a150d74741f4a0b4579ece678035f1e9a98a8
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentialevolution.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiate.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a86520666d32aae79735613d025699b4f3e74a0a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_differentiate.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_direct_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_direct_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9310a7da2a6a1bea76b6c66f2bf41ff555d43c0f
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_direct_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dual_annealing.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dual_annealing.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..552410fbf443a94955d7600298e6fb49f050b3b9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_dual_annealing.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_hessian_update_strategy.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_hessian_update_strategy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1557ad485c84dff112bfc8cab9135e26ef3d233d
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_hessian_update_strategy.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_isotonic.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_isotonic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31f76e9acd1fbeb9d34ef3eb15871524e3d4c762
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_isotonic.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_lbfgsb_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_lbfgsb_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99282b9ea1f3b952c230b16b18223583f3b1174d
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_lbfgsb_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linesearch.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linesearch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbd491691e430119ed1d9a596d42c6464feae2ed
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linesearch.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6110e661ca804e44dabdbdfeae349dbdb2b0ff9a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_doc.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_doc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76d4e50c8aac0d0a0d7b720fe4219b5329b0b513
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_doc.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_highs.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_highs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4fbeab003d6fa073796199c9c5b9206df6a74ca
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_highs.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_ip.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_ip.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09a856520a0f9ce17d6fdd8529da49cd996314f9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_ip.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_rs.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_rs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33b237facefdefd36d052844d0b4782ec0545aa8
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_rs.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_simplex.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_simplex.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f66e6c68a97469ca96e15695c1320fd1c76b8697
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_simplex.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_util.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4da5bd7aeb001ba8a06c19c774d8d174db6167a2
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_linprog_util.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_milp.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_milp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a6ad27d3a1148fefa6cf45445f9058c530f704c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_milp.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minimize.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minimize.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..514e5e0383a589bd1c8d50fd82d90d7dbf66a379
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minimize.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minpack_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minpack_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a2fb77f2c07ffbfeb868d7a27afcc25193a11652
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_minpack_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nnls.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nnls.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d5a91980eb871a943fe3cbfcbedf0efd22b0a43
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nnls.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nonlin.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nonlin.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87841e9657434fd712ad175ab9dfa9eb393dc8a2
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_nonlin.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_numdiff.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_numdiff.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05e4a29754d620b83b43b1ede57ace5aa413a95e
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_numdiff.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_qap.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_qap.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81bce330b128d6a4db0410b7369bca3e5576dfc9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_qap.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_remove_redundancy.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_remove_redundancy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bb1a25fbd98979aa0139a518bb0549ef78d964a8
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_remove_redundancy.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95afe07b0fd58dd871ff8674af9b8c606c47705a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root_scalar.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root_scalar.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..feaf39fce5e2908875c697de6c85c25a6d622972
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_root_scalar.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_shgo.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_shgo.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f7789c4488c7a98b14d204e7471d82cfd68d4d9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_shgo.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_slsqp_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_slsqp_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5fd1da9e8c4d965af80111b3debf5a47e308cf6
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_slsqp_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_spectral.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_spectral.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51cf34596c10cee500547f4b1bfb098aa51f732a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_spectral.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tnc.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tnc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1328744eb6cab031396a7b80ac2c2a6387bf713
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tnc.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3cec9d896268bd1d0925a0f8ed9025420a7e4d2a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_dogleg.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_dogleg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dcf03f34740915fd8ec3598fc9ca1e4f62d1a8fb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_dogleg.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_exact.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_exact.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d475f23baa4b7a7666b627b875a558745088cbb4
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_exact.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_krylov.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_krylov.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9887d034b259aff56ad86196f8856ce5c61ffe4b
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_krylov.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_ncg.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_ncg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8b6791c164bebb030f6d7d63a71a4c85a58fee7
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_trustregion_ncg.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tstutils.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tstutils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dfb6a16db62d01c3c4798dc3c060df732fb9280c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_tstutils.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_zeros_py.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_zeros_py.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8de45fb9d40304c9ef8a6942acf878de402af1bc
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/_zeros_py.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/cobyla.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/cobyla.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e103eee7d23e8e254a36bf195fd4b36ff9fe3f06
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/cobyla.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/lbfgsb.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/lbfgsb.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f3da34dbba7bc0b4d1b2b39431e98773287dac52
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/lbfgsb.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/linesearch.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/linesearch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9e334dafceeaf98556aed11ffe8907f09d1807c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/linesearch.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e84b34872a46678e640fa461232984be97407fe
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack2.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack2.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03964e372c2038b5ddcdb3664fcc52fc77b0a85e
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/minpack2.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/moduleTNC.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/moduleTNC.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31300c940c1109d5c8af4db21249f0bf3a5b6c91
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/moduleTNC.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/nonlin.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/nonlin.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98e5dfbfa7537d6414db28dbcb552bf4f097400a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/nonlin.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/optimize.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/optimize.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c480641fc2210b20642a044ba6aa50443db5425a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/optimize.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/slsqp.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/slsqp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15a983a2fa72542501d345ba8d5e81142c52c29c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/slsqp.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/tnc.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/tnc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..169eb4298ebb0bb9b85185fff18bf91a2fc47ebb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/tnc.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/zeros.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/zeros.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d6bfaaaa63fa3833900b368355e896b2958d436
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/__pycache__/zeros.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43b2c01c877847bccb4b4df9c75ef020fcbd77a8
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/_highs_constants.cpython-310-x86_64-linux-gnu.so b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/_highs_constants.cpython-310-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b3160b97e9bdd8a9bd31565c57e754e7b580b82f
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/_highs_constants.cpython-310-x86_64-linux-gnu.so differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HConst.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HConst.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..503d9e74a2636d2ee192491214102b84b3c67277
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HConst.pxd
@@ -0,0 +1,106 @@
+# cython: language_level=3
+
+from libcpp cimport bool
+from libcpp.string cimport string
+
+cdef extern from "HConst.h" nogil:
+
+    const int HIGHS_CONST_I_INF "kHighsIInf"
+    const double HIGHS_CONST_INF "kHighsInf"
+    const double kHighsTiny
+    const double kHighsZero
+    const int kHighsThreadLimit
+
+    cdef enum HighsDebugLevel:
+      HighsDebugLevel_kHighsDebugLevelNone "kHighsDebugLevelNone" = 0
+      HighsDebugLevel_kHighsDebugLevelCheap "kHighsDebugLevelCheap"
+      HighsDebugLevel_kHighsDebugLevelCostly "kHighsDebugLevelCostly"
+      HighsDebugLevel_kHighsDebugLevelExpensive "kHighsDebugLevelExpensive"
+      HighsDebugLevel_kHighsDebugLevelMin "kHighsDebugLevelMin" = HighsDebugLevel_kHighsDebugLevelNone
+      HighsDebugLevel_kHighsDebugLevelMax "kHighsDebugLevelMax" = HighsDebugLevel_kHighsDebugLevelExpensive
+
+    ctypedef enum HighsModelStatus:
+        HighsModelStatusNOTSET "HighsModelStatus::kNotset" = 0
+        HighsModelStatusLOAD_ERROR "HighsModelStatus::kLoadError"
+        HighsModelStatusMODEL_ERROR "HighsModelStatus::kModelError"
+        HighsModelStatusPRESOLVE_ERROR "HighsModelStatus::kPresolveError"
+        HighsModelStatusSOLVE_ERROR "HighsModelStatus::kSolveError"
+        HighsModelStatusPOSTSOLVE_ERROR "HighsModelStatus::kPostsolveError"
+        HighsModelStatusMODEL_EMPTY "HighsModelStatus::kModelEmpty"
+        HighsModelStatusOPTIMAL "HighsModelStatus::kOptimal"
+        HighsModelStatusINFEASIBLE "HighsModelStatus::kInfeasible"
+        HighsModelStatus_UNBOUNDED_OR_INFEASIBLE "HighsModelStatus::kUnboundedOrInfeasible"
+        HighsModelStatusUNBOUNDED "HighsModelStatus::kUnbounded"
+        HighsModelStatusREACHED_DUAL_OBJECTIVE_VALUE_UPPER_BOUND "HighsModelStatus::kObjectiveBound"
+        HighsModelStatusREACHED_OBJECTIVE_TARGET "HighsModelStatus::kObjectiveTarget"
+        HighsModelStatusREACHED_TIME_LIMIT "HighsModelStatus::kTimeLimit"
+        HighsModelStatusREACHED_ITERATION_LIMIT "HighsModelStatus::kIterationLimit"
+        HighsModelStatusUNKNOWN "HighsModelStatus::kUnknown"
+        HighsModelStatusHIGHS_MODEL_STATUS_MIN "HighsModelStatus::kMin" = HighsModelStatusNOTSET
+        HighsModelStatusHIGHS_MODEL_STATUS_MAX "HighsModelStatus::kMax" = HighsModelStatusUNKNOWN
+
+    cdef enum HighsBasisStatus:
+        HighsBasisStatusLOWER "HighsBasisStatus::kLower" = 0, # (slack) variable is at its lower bound [including fixed variables]
+        HighsBasisStatusBASIC "HighsBasisStatus::kBasic" # (slack) variable is basic
+        HighsBasisStatusUPPER "HighsBasisStatus::kUpper" # (slack) variable is at its upper bound
+        HighsBasisStatusZERO "HighsBasisStatus::kZero" # free variable is non-basic and set to zero
+        HighsBasisStatusNONBASIC "HighsBasisStatus::kNonbasic" # nonbasic with no specific bound information - useful for users and postsolve
+
+    cdef enum SolverOption:
+        SOLVER_OPTION_SIMPLEX "SolverOption::SOLVER_OPTION_SIMPLEX" = -1
+        SOLVER_OPTION_CHOOSE "SolverOption::SOLVER_OPTION_CHOOSE"
+        SOLVER_OPTION_IPM "SolverOption::SOLVER_OPTION_IPM"
+
+    cdef enum PrimalDualStatus:
+        PrimalDualStatusSTATUS_NOT_SET "PrimalDualStatus::STATUS_NOT_SET" = -1
+        PrimalDualStatusSTATUS_MIN "PrimalDualStatus::STATUS_MIN" = PrimalDualStatusSTATUS_NOT_SET
+        PrimalDualStatusSTATUS_NO_SOLUTION "PrimalDualStatus::STATUS_NO_SOLUTION"
+        PrimalDualStatusSTATUS_UNKNOWN "PrimalDualStatus::STATUS_UNKNOWN"
+        PrimalDualStatusSTATUS_INFEASIBLE_POINT "PrimalDualStatus::STATUS_INFEASIBLE_POINT"
+        PrimalDualStatusSTATUS_FEASIBLE_POINT "PrimalDualStatus::STATUS_FEASIBLE_POINT"
+        PrimalDualStatusSTATUS_MAX "PrimalDualStatus::STATUS_MAX" = PrimalDualStatusSTATUS_FEASIBLE_POINT
+
+    cdef enum HighsOptionType:
+        HighsOptionTypeBOOL "HighsOptionType::kBool" = 0
+        HighsOptionTypeINT "HighsOptionType::kInt"
+        HighsOptionTypeDOUBLE "HighsOptionType::kDouble"
+        HighsOptionTypeSTRING "HighsOptionType::kString"
+
+    # workaround for lack of enum class support in Cython < 3.x
+    # cdef enum class ObjSense(int):
+    #     ObjSenseMINIMIZE "ObjSense::kMinimize" = 1
+    #     ObjSenseMAXIMIZE "ObjSense::kMaximize" = -1
+
+    cdef cppclass ObjSense:
+        pass
+
+    cdef ObjSense ObjSenseMINIMIZE "ObjSense::kMinimize"
+    cdef ObjSense ObjSenseMAXIMIZE "ObjSense::kMaximize"
+
+    # cdef enum class MatrixFormat(int):
+    #     MatrixFormatkColwise "MatrixFormat::kColwise" = 1
+    #     MatrixFormatkRowwise "MatrixFormat::kRowwise"
+    #     MatrixFormatkRowwisePartitioned "MatrixFormat::kRowwisePartitioned"
+
+    cdef cppclass MatrixFormat:
+        pass
+
+    cdef MatrixFormat MatrixFormatkColwise "MatrixFormat::kColwise"
+    cdef MatrixFormat MatrixFormatkRowwise "MatrixFormat::kRowwise"
+    cdef MatrixFormat MatrixFormatkRowwisePartitioned "MatrixFormat::kRowwisePartitioned"
+
+    # cdef enum class HighsVarType(int):
+    #     kContinuous "HighsVarType::kContinuous"
+    #     kInteger "HighsVarType::kInteger"
+    #     kSemiContinuous "HighsVarType::kSemiContinuous"
+    #     kSemiInteger "HighsVarType::kSemiInteger"
+    #     kImplicitInteger "HighsVarType::kImplicitInteger"
+
+    cdef cppclass HighsVarType:
+        pass
+
+    cdef HighsVarType kContinuous "HighsVarType::kContinuous"
+    cdef HighsVarType kInteger "HighsVarType::kInteger"
+    cdef HighsVarType kSemiContinuous "HighsVarType::kSemiContinuous"
+    cdef HighsVarType kSemiInteger "HighsVarType::kSemiInteger"
+    cdef HighsVarType kImplicitInteger "HighsVarType::kImplicitInteger"
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/Highs.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/Highs.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..7139908d034127430b81f667548a055404ac033a
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/Highs.pxd
@@ -0,0 +1,56 @@
+# cython: language_level=3
+
+from libc.stdio cimport FILE
+
+from libcpp cimport bool
+from libcpp.string cimport string
+
+from .HighsStatus cimport HighsStatus
+from .HighsOptions cimport HighsOptions
+from .HighsInfo cimport HighsInfo
+from .HighsLp cimport (
+    HighsLp,
+    HighsSolution,
+    HighsBasis,
+    ObjSense,
+)
+from .HConst cimport HighsModelStatus
+
+cdef extern from "Highs.h":
+    # From HiGHS/src/Highs.h
+    cdef cppclass Highs:
+        HighsStatus passHighsOptions(const HighsOptions& options)
+        HighsStatus passModel(const HighsLp& lp)
+        HighsStatus run()
+        HighsStatus setHighsLogfile(FILE* logfile)
+        HighsStatus setHighsOutput(FILE* output)
+        HighsStatus writeHighsOptions(const string filename, const bool report_only_non_default_values = true)
+
+        # split up for cython below
+        #const HighsModelStatus& getModelStatus(const bool scaled_model = False) const
+        const HighsModelStatus & getModelStatus() const
+
+        const HighsInfo& getHighsInfo "getInfo" () const
+        string modelStatusToString(const HighsModelStatus model_status) const
+        #HighsStatus getHighsInfoValue(const string& info, int& value)
+        HighsStatus getHighsInfoValue(const string& info, double& value) const
+        const HighsOptions& getHighsOptions() const
+
+        const HighsLp& getLp() const
+
+        HighsStatus writeSolution(const string filename, const bool pretty) const
+
+        HighsStatus setBasis()
+        const HighsSolution& getSolution() const
+        const HighsBasis& getBasis() const
+
+        bool changeObjectiveSense(const ObjSense sense)
+
+        HighsStatus setHighsOptionValueBool "setOptionValue" (const string & option, const bool value)
+        HighsStatus setHighsOptionValueInt "setOptionValue" (const string & option, const int value)
+        HighsStatus setHighsOptionValueStr "setOptionValue" (const string & option, const string & value)
+        HighsStatus setHighsOptionValueDbl "setOptionValue" (const string & option, const double value)
+
+        string primalDualStatusToString(const int primal_dual_status)
+
+        void resetGlobalScheduler(bool blocking)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsIO.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsIO.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..82b80ae643f10be9c0e40c43f8ff0693c649052c
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsIO.pxd
@@ -0,0 +1,20 @@
+# cython: language_level=3
+
+
+cdef extern from "HighsIO.h" nogil:
+    # workaround for lack of enum class support in Cython < 3.x
+    # cdef enum class HighsLogType(int):
+    #     kInfo "HighsLogType::kInfo" = 1
+    #     kDetailed "HighsLogType::kDetailed"
+    #     kVerbose "HighsLogType::kVerbose"
+    #     kWarning "HighsLogType::kWarning"
+    #     kError "HighsLogType::kError"
+
+    cdef cppclass HighsLogType:
+        pass
+
+    cdef HighsLogType kInfo "HighsLogType::kInfo"
+    cdef HighsLogType kDetailed "HighsLogType::kDetailed"
+    cdef HighsLogType kVerbose "HighsLogType::kVerbose"
+    cdef HighsLogType kWarning "HighsLogType::kWarning"
+    cdef HighsLogType kError "HighsLogType::kError"
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsInfo.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsInfo.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..789b510898967499b1f04129b742cf505f4af75a
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsInfo.pxd
@@ -0,0 +1,22 @@
+# cython: language_level=3
+
+cdef extern from "HighsInfo.h" nogil:
+    # From HiGHS/src/lp_data/HighsInfo.h
+    cdef cppclass HighsInfo:
+        # Inherited from HighsInfoStruct:
+        int mip_node_count
+        int simplex_iteration_count
+        int ipm_iteration_count
+        int crossover_iteration_count
+        int primal_solution_status
+        int dual_solution_status
+        int basis_validity
+        double objective_function_value
+        double mip_dual_bound
+        double mip_gap
+        int num_primal_infeasibilities
+        double max_primal_infeasibility
+        double sum_primal_infeasibilities
+        int num_dual_infeasibilities
+        double max_dual_infeasibility
+        double sum_dual_infeasibilities
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLp.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLp.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..0944f083743f1c34847c3060278f5b7c40869251
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLp.pxd
@@ -0,0 +1,46 @@
+# cython: language_level=3
+
+from libcpp cimport bool
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+
+from .HConst cimport HighsBasisStatus, ObjSense, HighsVarType
+from .HighsSparseMatrix cimport HighsSparseMatrix
+
+
+cdef extern from "HighsLp.h" nogil:
+    # From HiGHS/src/lp_data/HighsLp.h
+    cdef cppclass HighsLp:
+        int num_col_
+        int num_row_
+
+        vector[double] col_cost_
+        vector[double] col_lower_
+        vector[double] col_upper_
+        vector[double] row_lower_
+        vector[double] row_upper_
+
+        HighsSparseMatrix a_matrix_
+
+        ObjSense sense_
+        double offset_
+
+        string model_name_
+
+        vector[string] row_names_
+        vector[string] col_names_
+
+        vector[HighsVarType] integrality_
+
+        bool isMip() const
+
+    cdef cppclass HighsSolution:
+        vector[double] col_value
+        vector[double] col_dual
+        vector[double] row_value
+        vector[double] row_dual
+
+    cdef cppclass HighsBasis:
+        bool valid_
+        vector[HighsBasisStatus] col_status
+        vector[HighsBasisStatus] row_status
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLpUtils.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLpUtils.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..18ede36c146acb395754fef33e888c7434ed307f
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsLpUtils.pxd
@@ -0,0 +1,9 @@
+# cython: language_level=3
+
+from .HighsStatus cimport HighsStatus
+from .HighsLp cimport HighsLp
+from .HighsOptions cimport HighsOptions
+
+cdef extern from "HighsLpUtils.h" nogil:
+    # From HiGHS/src/lp_data/HighsLpUtils.h
+    HighsStatus assessLp(HighsLp& lp, const HighsOptions& options)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsModelUtils.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsModelUtils.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..4fccc2e80046d0cee3011eaef7510802b226a85e
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsModelUtils.pxd
@@ -0,0 +1,10 @@
+# cython: language_level=3
+
+from libcpp.string cimport string
+
+from .HConst cimport HighsModelStatus
+
+cdef extern from "HighsModelUtils.h" nogil:
+    # From HiGHS/src/lp_data/HighsModelUtils.h
+    string utilHighsModelStatusToString(const HighsModelStatus model_status)
+    string utilBasisStatusToString(const int primal_dual_status)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsOptions.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsOptions.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..920c10c19e30cad9229ce98bfa1e73970feb9f1e
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsOptions.pxd
@@ -0,0 +1,110 @@
+# cython: language_level=3
+
+from libc.stdio cimport FILE
+
+from libcpp cimport bool
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+
+from .HConst cimport HighsOptionType
+
+cdef extern from "HighsOptions.h" nogil:
+
+    cdef cppclass OptionRecord:
+        HighsOptionType type
+        string name
+        string description
+        bool advanced
+
+    cdef cppclass OptionRecordBool(OptionRecord):
+        bool* value
+        bool default_value
+
+    cdef cppclass OptionRecordInt(OptionRecord):
+        int* value
+        int lower_bound
+        int default_value
+        int upper_bound
+
+    cdef cppclass OptionRecordDouble(OptionRecord):
+        double* value
+        double lower_bound
+        double default_value
+        double upper_bound
+
+    cdef cppclass OptionRecordString(OptionRecord):
+        string* value
+        string default_value
+
+    cdef cppclass HighsOptions:
+        # From HighsOptionsStruct:
+
+        # Options read from the command line
+        string model_file
+        string presolve
+        string solver
+        string parallel
+        double time_limit
+        string options_file
+
+        # Options read from the file
+        double infinite_cost
+        double infinite_bound
+        double small_matrix_value
+        double large_matrix_value
+        double primal_feasibility_tolerance
+        double dual_feasibility_tolerance
+        double ipm_optimality_tolerance
+        double dual_objective_value_upper_bound
+        int highs_debug_level
+        int simplex_strategy
+        int simplex_scale_strategy
+        int simplex_crash_strategy
+        int simplex_dual_edge_weight_strategy
+        int simplex_primal_edge_weight_strategy
+        int simplex_iteration_limit
+        int simplex_update_limit
+        int ipm_iteration_limit
+        int highs_min_threads
+        int highs_max_threads
+        int message_level
+        string solution_file
+        bool write_solution_to_file
+        bool write_solution_pretty
+
+        # Advanced options
+        bool run_crossover
+        bool mps_parser_type_free
+        int keep_n_rows
+        int allowed_simplex_matrix_scale_factor
+        int allowed_simplex_cost_scale_factor
+        int simplex_dualise_strategy
+        int simplex_permute_strategy
+        int dual_simplex_cleanup_strategy
+        int simplex_price_strategy
+        int dual_chuzc_sort_strategy
+        bool simplex_initial_condition_check
+        double simplex_initial_condition_tolerance
+        double dual_steepest_edge_weight_log_error_threshhold
+        double dual_simplex_cost_perturbation_multiplier
+        double start_crossover_tolerance
+        bool less_infeasible_DSE_check
+        bool less_infeasible_DSE_choose_row
+        bool use_original_HFactor_logic
+
+        # Options for MIP solver
+        int mip_max_nodes
+        int mip_report_level
+
+        # Switch for MIP solver
+        bool mip
+
+        # Options for HighsPrintMessage and HighsLogMessage
+        FILE* logfile
+        FILE* output
+        int message_level
+        string solution_file
+        bool write_solution_to_file
+        bool write_solution_pretty
+
+        vector[OptionRecord*] records
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsRuntimeOptions.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsRuntimeOptions.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..3e227b7a44f797469bab9ad8521c7c0273eca7d2
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsRuntimeOptions.pxd
@@ -0,0 +1,9 @@
+# cython: language_level=3
+
+from libcpp cimport bool
+
+from .HighsOptions cimport HighsOptions
+
+cdef extern from "HighsRuntimeOptions.h" nogil:
+    # From HiGHS/src/lp_data/HighsRuntimeOptions.h
+    bool loadOptions(int argc, char** argv, HighsOptions& options)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsStatus.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsStatus.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..b47813b5d3917c3734476980e26b532dfc37aac4
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/HighsStatus.pxd
@@ -0,0 +1,12 @@
+# cython: language_level=3
+
+from libcpp.string cimport string
+
+cdef extern from "HighsStatus.h" nogil:
+    ctypedef enum HighsStatus:
+        HighsStatusError "HighsStatus::kError" = -1
+        HighsStatusOK "HighsStatus::kOk" = 0
+        HighsStatusWarning "HighsStatus::kWarning" = 1
+
+
+    string highsStatusToString(HighsStatus status)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/SimplexConst.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/SimplexConst.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..77e7b96320d6fab81009e6a80e784c722e036b4f
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/SimplexConst.pxd
@@ -0,0 +1,95 @@
+# cython: language_level=3
+
+from libcpp cimport bool
+
+cdef extern from "SimplexConst.h" nogil:
+
+    cdef enum SimplexAlgorithm:
+        PRIMAL "SimplexAlgorithm::kPrimal" = 0
+        DUAL "SimplexAlgorithm::kDual"
+
+    cdef enum SimplexStrategy:
+        SIMPLEX_STRATEGY_MIN "SimplexStrategy::kSimplexStrategyMin" = 0
+        SIMPLEX_STRATEGY_CHOOSE "SimplexStrategy::kSimplexStrategyChoose" = SIMPLEX_STRATEGY_MIN
+        SIMPLEX_STRATEGY_DUAL "SimplexStrategy::kSimplexStrategyDual"
+        SIMPLEX_STRATEGY_DUAL_PLAIN "SimplexStrategy::kSimplexStrategyDualPlain" = SIMPLEX_STRATEGY_DUAL
+        SIMPLEX_STRATEGY_DUAL_TASKS "SimplexStrategy::kSimplexStrategyDualTasks"
+        SIMPLEX_STRATEGY_DUAL_MULTI "SimplexStrategy::kSimplexStrategyDualMulti"
+        SIMPLEX_STRATEGY_PRIMAL "SimplexStrategy::kSimplexStrategyPrimal"
+        SIMPLEX_STRATEGY_MAX "SimplexStrategy::kSimplexStrategyMax" = SIMPLEX_STRATEGY_PRIMAL
+        SIMPLEX_STRATEGY_NUM "SimplexStrategy::kSimplexStrategyNum"
+
+    cdef enum SimplexCrashStrategy:
+        SIMPLEX_CRASH_STRATEGY_MIN "SimplexCrashStrategy::kSimplexCrashStrategyMin" = 0
+        SIMPLEX_CRASH_STRATEGY_OFF "SimplexCrashStrategy::kSimplexCrashStrategyOff" = SIMPLEX_CRASH_STRATEGY_MIN
+        SIMPLEX_CRASH_STRATEGY_LTSSF_K "SimplexCrashStrategy::kSimplexCrashStrategyLtssfK"
+        SIMPLEX_CRASH_STRATEGY_LTSSF "SimplexCrashStrategy::kSimplexCrashStrategyLtssf" = SIMPLEX_CRASH_STRATEGY_LTSSF_K
+        SIMPLEX_CRASH_STRATEGY_BIXBY "SimplexCrashStrategy::kSimplexCrashStrategyBixby"
+        SIMPLEX_CRASH_STRATEGY_LTSSF_PRI "SimplexCrashStrategy::kSimplexCrashStrategyLtssfPri"
+        SIMPLEX_CRASH_STRATEGY_LTSF_K "SimplexCrashStrategy::kSimplexCrashStrategyLtsfK"
+        SIMPLEX_CRASH_STRATEGY_LTSF_PRI "SimplexCrashStrategy::kSimplexCrashStrategyLtsfPri"
+        SIMPLEX_CRASH_STRATEGY_LTSF "SimplexCrashStrategy::kSimplexCrashStrategyLtsf"
+        SIMPLEX_CRASH_STRATEGY_BIXBY_NO_NONZERO_COL_COSTS "SimplexCrashStrategy::kSimplexCrashStrategyBixbyNoNonzeroColCosts"
+        SIMPLEX_CRASH_STRATEGY_BASIC "SimplexCrashStrategy::kSimplexCrashStrategyBasic"
+        SIMPLEX_CRASH_STRATEGY_TEST_SING "SimplexCrashStrategy::kSimplexCrashStrategyTestSing"
+        SIMPLEX_CRASH_STRATEGY_MAX "SimplexCrashStrategy::kSimplexCrashStrategyMax" = SIMPLEX_CRASH_STRATEGY_TEST_SING
+
+    cdef enum SimplexEdgeWeightStrategy:
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_MIN "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategyMin" = -1
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_CHOOSE "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategyChoose" = SIMPLEX_EDGE_WEIGHT_STRATEGY_MIN
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_DANTZIG "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategyDantzig"
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_DEVEX "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategyDevex"
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategySteepestEdge"
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE_UNIT_INITIAL "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategySteepestEdgeUnitInitial"
+        SIMPLEX_EDGE_WEIGHT_STRATEGY_MAX "SimplexEdgeWeightStrategy::kSimplexEdgeWeightStrategyMax" = SIMPLEX_EDGE_WEIGHT_STRATEGY_STEEPEST_EDGE_UNIT_INITIAL
+
+    cdef enum SimplexPriceStrategy:
+        SIMPLEX_PRICE_STRATEGY_MIN = 0
+        SIMPLEX_PRICE_STRATEGY_COL = SIMPLEX_PRICE_STRATEGY_MIN
+        SIMPLEX_PRICE_STRATEGY_ROW
+        SIMPLEX_PRICE_STRATEGY_ROW_SWITCH
+        SIMPLEX_PRICE_STRATEGY_ROW_SWITCH_COL_SWITCH
+        SIMPLEX_PRICE_STRATEGY_MAX = SIMPLEX_PRICE_STRATEGY_ROW_SWITCH_COL_SWITCH
+
+    cdef enum SimplexDualChuzcStrategy:
+        SIMPLEX_DUAL_CHUZC_STRATEGY_MIN = 0
+        SIMPLEX_DUAL_CHUZC_STRATEGY_CHOOSE = SIMPLEX_DUAL_CHUZC_STRATEGY_MIN
+        SIMPLEX_DUAL_CHUZC_STRATEGY_QUAD
+        SIMPLEX_DUAL_CHUZC_STRATEGY_HEAP
+        SIMPLEX_DUAL_CHUZC_STRATEGY_BOTH
+        SIMPLEX_DUAL_CHUZC_STRATEGY_MAX = SIMPLEX_DUAL_CHUZC_STRATEGY_BOTH
+
+    cdef enum InvertHint:
+        INVERT_HINT_NO = 0
+        INVERT_HINT_UPDATE_LIMIT_REACHED
+        INVERT_HINT_SYNTHETIC_CLOCK_SAYS_INVERT
+        INVERT_HINT_POSSIBLY_OPTIMAL
+        INVERT_HINT_POSSIBLY_PRIMAL_UNBOUNDED
+        INVERT_HINT_POSSIBLY_DUAL_UNBOUNDED
+        INVERT_HINT_POSSIBLY_SINGULAR_BASIS
+        INVERT_HINT_PRIMAL_INFEASIBLE_IN_PRIMAL_SIMPLEX
+        INVERT_HINT_CHOOSE_COLUMN_FAIL
+        INVERT_HINT_Count
+
+    cdef enum DualEdgeWeightMode:
+        DANTZIG "DualEdgeWeightMode::DANTZIG" = 0
+        DEVEX "DualEdgeWeightMode::DEVEX"
+        STEEPEST_EDGE "DualEdgeWeightMode::STEEPEST_EDGE"
+        Count "DualEdgeWeightMode::Count"
+
+    cdef enum PriceMode:
+        ROW "PriceMode::ROW" = 0
+        COL "PriceMode::COL"
+
+    const int PARALLEL_THREADS_DEFAULT
+    const int DUAL_TASKS_MIN_THREADS
+    const int DUAL_MULTI_MIN_THREADS
+
+    const bool invert_if_row_out_negative
+
+    const int NONBASIC_FLAG_TRUE
+    const int NONBASIC_FLAG_FALSE
+
+    const int NONBASIC_MOVE_UP
+    const int NONBASIC_MOVE_DN
+    const int NONBASIC_MOVE_ZE
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/highs_c_api.pxd b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/highs_c_api.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..b7097caf30bcd298bd11f11fd8911f841eefbdde
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_highs/src/cython/highs_c_api.pxd
@@ -0,0 +1,7 @@
+# cython: language_level=3
+
+cdef extern from "highs_c_api.h" nogil:
+    int Highs_passLp(void* highs, int numcol, int numrow, int numnz,
+                     double* colcost, double* collower, double* colupper,
+                     double* rowlower, double* rowupper,
+                     int* astart, int* aindex,  double* avalue)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f60adcc891304e34ac9d85d108b6a232b4bf0c93
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__init__.py
@@ -0,0 +1,5 @@
+"""This module contains least-squares algorithms."""
+from .least_squares import least_squares
+from .lsq_linear import lsq_linear
+
+__all__ = ['least_squares', 'lsq_linear']
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4708b3ea7f87b238c8e06a7f6ce3fb8205d75a9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/trf.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/trf.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f39d66a23e2d8e3038825e638f9c48aad4fdcec
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/__pycache__/trf.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/bvls.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/bvls.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f34ead4a1fc4edbb3c2ab50a204aa9a3cc21cff
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/bvls.py
@@ -0,0 +1,183 @@
+"""Bounded-variable least-squares algorithm."""
+import numpy as np
+from numpy.linalg import norm, lstsq
+from scipy.optimize import OptimizeResult
+
+from .common import print_header_linear, print_iteration_linear
+
+
+def compute_kkt_optimality(g, on_bound):
+    """Compute the maximum violation of KKT conditions."""
+    g_kkt = g * on_bound
+    free_set = on_bound == 0
+    g_kkt[free_set] = np.abs(g[free_set])
+    return np.max(g_kkt)
+
+
+def bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose, rcond=None):
+    m, n = A.shape
+
+    x = x_lsq.copy()
+    on_bound = np.zeros(n)
+
+    mask = x <= lb
+    x[mask] = lb[mask]
+    on_bound[mask] = -1
+
+    mask = x >= ub
+    x[mask] = ub[mask]
+    on_bound[mask] = 1
+
+    free_set = on_bound == 0
+    active_set = ~free_set
+    free_set, = np.nonzero(free_set)
+
+    r = A.dot(x) - b
+    cost = 0.5 * np.dot(r, r)
+    initial_cost = cost
+    g = A.T.dot(r)
+
+    cost_change = None
+    step_norm = None
+    iteration = 0
+
+    if verbose == 2:
+        print_header_linear()
+
+    # This is the initialization loop. The requirement is that the
+    # least-squares solution on free variables is feasible before BVLS starts.
+    # One possible initialization is to set all variables to lower or upper
+    # bounds, but many iterations may be required from this state later on.
+    # The implemented ad-hoc procedure which intuitively should give a better
+    # initial state: find the least-squares solution on current free variables,
+    # if its feasible then stop, otherwise, set violating variables to
+    # corresponding bounds and continue on the reduced set of free variables.
+
+    while free_set.size > 0:
+        if verbose == 2:
+            optimality = compute_kkt_optimality(g, on_bound)
+            print_iteration_linear(iteration, cost, cost_change, step_norm,
+                                   optimality)
+
+        iteration += 1
+        x_free_old = x[free_set].copy()
+
+        A_free = A[:, free_set]
+        b_free = b - A.dot(x * active_set)
+        z = lstsq(A_free, b_free, rcond=rcond)[0]
+
+        lbv = z < lb[free_set]
+        ubv = z > ub[free_set]
+        v = lbv | ubv
+
+        if np.any(lbv):
+            ind = free_set[lbv]
+            x[ind] = lb[ind]
+            active_set[ind] = True
+            on_bound[ind] = -1
+
+        if np.any(ubv):
+            ind = free_set[ubv]
+            x[ind] = ub[ind]
+            active_set[ind] = True
+            on_bound[ind] = 1
+
+        ind = free_set[~v]
+        x[ind] = z[~v]
+
+        r = A.dot(x) - b
+        cost_new = 0.5 * np.dot(r, r)
+        cost_change = cost - cost_new
+        cost = cost_new
+        g = A.T.dot(r)
+        step_norm = norm(x[free_set] - x_free_old)
+
+        if np.any(v):
+            free_set = free_set[~v]
+        else:
+            break
+
+    if max_iter is None:
+        max_iter = n
+    max_iter += iteration
+
+    termination_status = None
+
+    # Main BVLS loop.
+
+    optimality = compute_kkt_optimality(g, on_bound)
+    for iteration in range(iteration, max_iter):  # BVLS Loop A
+        if verbose == 2:
+            print_iteration_linear(iteration, cost, cost_change,
+                                   step_norm, optimality)
+
+        if optimality < tol:
+            termination_status = 1
+
+        if termination_status is not None:
+            break
+
+        move_to_free = np.argmax(g * on_bound)
+        on_bound[move_to_free] = 0
+        
+        while True:   # BVLS Loop B
+
+            free_set = on_bound == 0
+            active_set = ~free_set
+            free_set, = np.nonzero(free_set)
+    
+            x_free = x[free_set]
+            x_free_old = x_free.copy()
+            lb_free = lb[free_set]
+            ub_free = ub[free_set]
+
+            A_free = A[:, free_set]
+            b_free = b - A.dot(x * active_set)
+            z = lstsq(A_free, b_free, rcond=rcond)[0]
+
+            lbv, = np.nonzero(z < lb_free)
+            ubv, = np.nonzero(z > ub_free)
+            v = np.hstack((lbv, ubv))
+
+            if v.size > 0:
+                alphas = np.hstack((
+                    lb_free[lbv] - x_free[lbv],
+                    ub_free[ubv] - x_free[ubv])) / (z[v] - x_free[v])
+
+                i = np.argmin(alphas)
+                i_free = v[i]
+                alpha = alphas[i]
+
+                x_free *= 1 - alpha
+                x_free += alpha * z
+                x[free_set] = x_free
+
+                if i < lbv.size:
+                    on_bound[free_set[i_free]] = -1
+                else:
+                    on_bound[free_set[i_free]] = 1
+            else:
+                x_free = z
+                x[free_set] = x_free
+                break
+
+        step_norm = norm(x_free - x_free_old)
+
+        r = A.dot(x) - b
+        cost_new = 0.5 * np.dot(r, r)
+        cost_change = cost - cost_new
+
+        if cost_change < tol * cost:
+            termination_status = 2
+        cost = cost_new
+
+        g = A.T.dot(r)
+        optimality = compute_kkt_optimality(g, on_bound)
+
+    if termination_status is None:
+        termination_status = 0
+
+    return OptimizeResult(
+        x=x, fun=r, cost=cost, optimality=optimality, active_mask=on_bound,
+        nit=iteration + 1, status=termination_status,
+        initial_cost=initial_cost)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/common.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..995c3b64ea64670463083ae41ba038f61338cdef
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/common.py
@@ -0,0 +1,733 @@
+"""Functions used by least-squares algorithms."""
+from math import copysign
+
+import numpy as np
+from numpy.linalg import norm
+
+from scipy.linalg import cho_factor, cho_solve, LinAlgError
+from scipy.sparse import issparse
+from scipy.sparse.linalg import LinearOperator, aslinearoperator
+
+
+EPS = np.finfo(float).eps
+
+
+# Functions related to a trust-region problem.
+
+
+def intersect_trust_region(x, s, Delta):
+    """Find the intersection of a line with the boundary of a trust region.
+
+    This function solves the quadratic equation with respect to t
+    ||(x + s*t)||**2 = Delta**2.
+
+    Returns
+    -------
+    t_neg, t_pos : tuple of float
+        Negative and positive roots.
+
+    Raises
+    ------
+    ValueError
+        If `s` is zero or `x` is not within the trust region.
+    """
+    a = np.dot(s, s)
+    if a == 0:
+        raise ValueError("`s` is zero.")
+
+    b = np.dot(x, s)
+
+    c = np.dot(x, x) - Delta**2
+    if c > 0:
+        raise ValueError("`x` is not within the trust region.")
+
+    d = np.sqrt(b*b - a*c)  # Root from one fourth of the discriminant.
+
+    # Computations below avoid loss of significance, see "Numerical Recipes".
+    q = -(b + copysign(d, b))
+    t1 = q / a
+    t2 = c / q
+
+    if t1 < t2:
+        return t1, t2
+    else:
+        return t2, t1
+
+
+def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
+                           rtol=0.01, max_iter=10):
+    """Solve a trust-region problem arising in least-squares minimization.
+
+    This function implements a method described by J. J. More [1]_ and used
+    in MINPACK, but it relies on a single SVD of Jacobian instead of series
+    of Cholesky decompositions. Before running this function, compute:
+    ``U, s, VT = svd(J, full_matrices=False)``.
+
+    Parameters
+    ----------
+    n : int
+        Number of variables.
+    m : int
+        Number of residuals.
+    uf : ndarray
+        Computed as U.T.dot(f).
+    s : ndarray
+        Singular values of J.
+    V : ndarray
+        Transpose of VT.
+    Delta : float
+        Radius of a trust region.
+    initial_alpha : float, optional
+        Initial guess for alpha, which might be available from a previous
+        iteration. If None, determined automatically.
+    rtol : float, optional
+        Stopping tolerance for the root-finding procedure. Namely, the
+        solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
+    max_iter : int, optional
+        Maximum allowed number of iterations for the root-finding procedure.
+
+    Returns
+    -------
+    p : ndarray, shape (n,)
+        Found solution of a trust-region problem.
+    alpha : float
+        Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
+        Sometimes called Levenberg-Marquardt parameter.
+    n_iter : int
+        Number of iterations made by root-finding procedure. Zero means
+        that Gauss-Newton step was selected as the solution.
+
+    References
+    ----------
+    .. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
+           and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
+           in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
+    """
+    def phi_and_derivative(alpha, suf, s, Delta):
+        """Function of which to find zero.
+
+        It is defined as "norm of regularized (by alpha) least-squares
+        solution minus `Delta`". Refer to [1]_.
+        """
+        denom = s**2 + alpha
+        p_norm = norm(suf / denom)
+        phi = p_norm - Delta
+        phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
+        return phi, phi_prime
+
+    suf = s * uf
+
+    # Check if J has full rank and try Gauss-Newton step.
+    if m >= n:
+        threshold = EPS * m * s[0]
+        full_rank = s[-1] > threshold
+    else:
+        full_rank = False
+
+    if full_rank:
+        p = -V.dot(uf / s)
+        if norm(p) <= Delta:
+            return p, 0.0, 0
+
+    alpha_upper = norm(suf) / Delta
+
+    if full_rank:
+        phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
+        alpha_lower = -phi / phi_prime
+    else:
+        alpha_lower = 0.0
+
+    if initial_alpha is None or not full_rank and initial_alpha == 0:
+        alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
+    else:
+        alpha = initial_alpha
+
+    for it in range(max_iter):
+        if alpha < alpha_lower or alpha > alpha_upper:
+            alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
+
+        phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
+
+        if phi < 0:
+            alpha_upper = alpha
+
+        ratio = phi / phi_prime
+        alpha_lower = max(alpha_lower, alpha - ratio)
+        alpha -= (phi + Delta) * ratio / Delta
+
+        if np.abs(phi) < rtol * Delta:
+            break
+
+    p = -V.dot(suf / (s**2 + alpha))
+
+    # Make the norm of p equal to Delta, p is changed only slightly during
+    # this. It is done to prevent p lie outside the trust region (which can
+    # cause problems later).
+    p *= Delta / norm(p)
+
+    return p, alpha, it + 1
+
+
+def solve_trust_region_2d(B, g, Delta):
+    """Solve a general trust-region problem in 2 dimensions.
+
+    The problem is reformulated as a 4th order algebraic equation,
+    the solution of which is found by numpy.roots.
+
+    Parameters
+    ----------
+    B : ndarray, shape (2, 2)
+        Symmetric matrix, defines a quadratic term of the function.
+    g : ndarray, shape (2,)
+        Defines a linear term of the function.
+    Delta : float
+        Radius of a trust region.
+
+    Returns
+    -------
+    p : ndarray, shape (2,)
+        Found solution.
+    newton_step : bool
+        Whether the returned solution is the Newton step which lies within
+        the trust region.
+    """
+    try:
+        R, lower = cho_factor(B)
+        p = -cho_solve((R, lower), g)
+        if np.dot(p, p) <= Delta**2:
+            return p, True
+    except LinAlgError:
+        pass
+
+    a = B[0, 0] * Delta**2
+    b = B[0, 1] * Delta**2
+    c = B[1, 1] * Delta**2
+
+    d = g[0] * Delta
+    f = g[1] * Delta
+
+    coeffs = np.array(
+        [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
+    t = np.roots(coeffs)  # Can handle leading zeros.
+    t = np.real(t[np.isreal(t)])
+
+    p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
+    value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
+    i = np.argmin(value)
+    p = p[:, i]
+
+    return p, False
+
+
+def update_tr_radius(Delta, actual_reduction, predicted_reduction,
+                     step_norm, bound_hit):
+    """Update the radius of a trust region based on the cost reduction.
+
+    Returns
+    -------
+    Delta : float
+        New radius.
+    ratio : float
+        Ratio between actual and predicted reductions.
+    """
+    if predicted_reduction > 0:
+        ratio = actual_reduction / predicted_reduction
+    elif predicted_reduction == actual_reduction == 0:
+        ratio = 1
+    else:
+        ratio = 0
+
+    if ratio < 0.25:
+        Delta = 0.25 * step_norm
+    elif ratio > 0.75 and bound_hit:
+        Delta *= 2.0
+
+    return Delta, ratio
+
+
+# Construction and minimization of quadratic functions.
+
+
+def build_quadratic_1d(J, g, s, diag=None, s0=None):
+    """Parameterize a multivariate quadratic function along a line.
+
+    The resulting univariate quadratic function is given as follows::
+
+        f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
+               g.T * (s0 + s*t)
+
+    Parameters
+    ----------
+    J : ndarray, sparse matrix or LinearOperator shape (m, n)
+        Jacobian matrix, affects the quadratic term.
+    g : ndarray, shape (n,)
+        Gradient, defines the linear term.
+    s : ndarray, shape (n,)
+        Direction vector of a line.
+    diag : None or ndarray with shape (n,), optional
+        Addition diagonal part, affects the quadratic term.
+        If None, assumed to be 0.
+    s0 : None or ndarray with shape (n,), optional
+        Initial point. If None, assumed to be 0.
+
+    Returns
+    -------
+    a : float
+        Coefficient for t**2.
+    b : float
+        Coefficient for t.
+    c : float
+        Free term. Returned only if `s0` is provided.
+    """
+    v = J.dot(s)
+    a = np.dot(v, v)
+    if diag is not None:
+        a += np.dot(s * diag, s)
+    a *= 0.5
+
+    b = np.dot(g, s)
+
+    if s0 is not None:
+        u = J.dot(s0)
+        b += np.dot(u, v)
+        c = 0.5 * np.dot(u, u) + np.dot(g, s0)
+        if diag is not None:
+            b += np.dot(s0 * diag, s)
+            c += 0.5 * np.dot(s0 * diag, s0)
+        return a, b, c
+    else:
+        return a, b
+
+
+def minimize_quadratic_1d(a, b, lb, ub, c=0):
+    """Minimize a 1-D quadratic function subject to bounds.
+
+    The free term `c` is 0 by default. Bounds must be finite.
+
+    Returns
+    -------
+    t : float
+        Minimum point.
+    y : float
+        Minimum value.
+    """
+    t = [lb, ub]
+    if a != 0:
+        extremum = -0.5 * b / a
+        if lb < extremum < ub:
+            t.append(extremum)
+    t = np.asarray(t)
+    y = t * (a * t + b) + c
+    min_index = np.argmin(y)
+    return t[min_index], y[min_index]
+
+
+def evaluate_quadratic(J, g, s, diag=None):
+    """Compute values of a quadratic function arising in least squares.
+
+    The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
+
+    Parameters
+    ----------
+    J : ndarray, sparse matrix or LinearOperator, shape (m, n)
+        Jacobian matrix, affects the quadratic term.
+    g : ndarray, shape (n,)
+        Gradient, defines the linear term.
+    s : ndarray, shape (k, n) or (n,)
+        Array containing steps as rows.
+    diag : ndarray, shape (n,), optional
+        Addition diagonal part, affects the quadratic term.
+        If None, assumed to be 0.
+
+    Returns
+    -------
+    values : ndarray with shape (k,) or float
+        Values of the function. If `s` was 2-D, then ndarray is
+        returned, otherwise, float is returned.
+    """
+    if s.ndim == 1:
+        Js = J.dot(s)
+        q = np.dot(Js, Js)
+        if diag is not None:
+            q += np.dot(s * diag, s)
+    else:
+        Js = J.dot(s.T)
+        q = np.sum(Js**2, axis=0)
+        if diag is not None:
+            q += np.sum(diag * s**2, axis=1)
+
+    l = np.dot(s, g)
+
+    return 0.5 * q + l
+
+
+# Utility functions to work with bound constraints.
+
+
+def in_bounds(x, lb, ub):
+    """Check if a point lies within bounds."""
+    return np.all((x >= lb) & (x <= ub))
+
+
+def step_size_to_bound(x, s, lb, ub):
+    """Compute a min_step size required to reach a bound.
+
+    The function computes a positive scalar t, such that x + s * t is on
+    the bound.
+
+    Returns
+    -------
+    step : float
+        Computed step. Non-negative value.
+    hits : ndarray of int with shape of x
+        Each element indicates whether a corresponding variable reaches the
+        bound:
+
+             *  0 - the bound was not hit.
+             * -1 - the lower bound was hit.
+             *  1 - the upper bound was hit.
+    """
+    non_zero = np.nonzero(s)
+    s_non_zero = s[non_zero]
+    steps = np.empty_like(x)
+    steps.fill(np.inf)
+    with np.errstate(over='ignore'):
+        steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
+                                     (ub - x)[non_zero] / s_non_zero)
+    min_step = np.min(steps)
+    return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
+
+
+def find_active_constraints(x, lb, ub, rtol=1e-10):
+    """Determine which constraints are active in a given point.
+
+    The threshold is computed using `rtol` and the absolute value of the
+    closest bound.
+
+    Returns
+    -------
+    active : ndarray of int with shape of x
+        Each component shows whether the corresponding constraint is active:
+
+             *  0 - a constraint is not active.
+             * -1 - a lower bound is active.
+             *  1 - a upper bound is active.
+    """
+    active = np.zeros_like(x, dtype=int)
+
+    if rtol == 0:
+        active[x <= lb] = -1
+        active[x >= ub] = 1
+        return active
+
+    lower_dist = x - lb
+    upper_dist = ub - x
+
+    lower_threshold = rtol * np.maximum(1, np.abs(lb))
+    upper_threshold = rtol * np.maximum(1, np.abs(ub))
+
+    lower_active = (np.isfinite(lb) &
+                    (lower_dist <= np.minimum(upper_dist, lower_threshold)))
+    active[lower_active] = -1
+
+    upper_active = (np.isfinite(ub) &
+                    (upper_dist <= np.minimum(lower_dist, upper_threshold)))
+    active[upper_active] = 1
+
+    return active
+
+
+def make_strictly_feasible(x, lb, ub, rstep=1e-10):
+    """Shift a point to the interior of a feasible region.
+
+    Each element of the returned vector is at least at a relative distance
+    `rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
+    """
+    x_new = x.copy()
+
+    active = find_active_constraints(x, lb, ub, rstep)
+    lower_mask = np.equal(active, -1)
+    upper_mask = np.equal(active, 1)
+
+    if rstep == 0:
+        x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
+        x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
+    else:
+        x_new[lower_mask] = (lb[lower_mask] +
+                             rstep * np.maximum(1, np.abs(lb[lower_mask])))
+        x_new[upper_mask] = (ub[upper_mask] -
+                             rstep * np.maximum(1, np.abs(ub[upper_mask])))
+
+    tight_bounds = (x_new < lb) | (x_new > ub)
+    x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
+
+    return x_new
+
+
+def CL_scaling_vector(x, g, lb, ub):
+    """Compute Coleman-Li scaling vector and its derivatives.
+
+    Components of a vector v are defined as follows::
+
+               | ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
+        v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
+               | 1,           otherwise
+
+    According to this definition v[i] >= 0 for all i. It differs from the
+    definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
+    used. Both definitions are equivalent down the line.
+    Derivatives of v with respect to x take value 1, -1 or 0 depending on a
+    case.
+
+    Returns
+    -------
+    v : ndarray with shape of x
+        Scaling vector.
+    dv : ndarray with shape of x
+        Derivatives of v[i] with respect to x[i], diagonal elements of v's
+        Jacobian.
+
+    References
+    ----------
+    .. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
+           and Conjugate Gradient Method for Large-Scale Bound-Constrained
+           Minimization Problems," SIAM Journal on Scientific Computing,
+           Vol. 21, Number 1, pp 1-23, 1999.
+    """
+    v = np.ones_like(x)
+    dv = np.zeros_like(x)
+
+    mask = (g < 0) & np.isfinite(ub)
+    v[mask] = ub[mask] - x[mask]
+    dv[mask] = -1
+
+    mask = (g > 0) & np.isfinite(lb)
+    v[mask] = x[mask] - lb[mask]
+    dv[mask] = 1
+
+    return v, dv
+
+
+def reflective_transformation(y, lb, ub):
+    """Compute reflective transformation and its gradient."""
+    if in_bounds(y, lb, ub):
+        return y, np.ones_like(y)
+
+    lb_finite = np.isfinite(lb)
+    ub_finite = np.isfinite(ub)
+
+    x = y.copy()
+    g_negative = np.zeros_like(y, dtype=bool)
+
+    mask = lb_finite & ~ub_finite
+    x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
+    g_negative[mask] = y[mask] < lb[mask]
+
+    mask = ~lb_finite & ub_finite
+    x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
+    g_negative[mask] = y[mask] > ub[mask]
+
+    mask = lb_finite & ub_finite
+    d = ub - lb
+    t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
+    x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
+    g_negative[mask] = t > d[mask]
+
+    g = np.ones_like(y)
+    g[g_negative] = -1
+
+    return x, g
+
+
+# Functions to display algorithm's progress.
+
+
+def print_header_nonlinear():
+    print("{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}"
+          .format("Iteration", "Total nfev", "Cost", "Cost reduction",
+                  "Step norm", "Optimality"))
+
+
+def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
+                              step_norm, optimality):
+    if cost_reduction is None:
+        cost_reduction = " " * 15
+    else:
+        cost_reduction = f"{cost_reduction:^15.2e}"
+
+    if step_norm is None:
+        step_norm = " " * 15
+    else:
+        step_norm = f"{step_norm:^15.2e}"
+
+    print("{:^15}{:^15}{:^15.4e}{}{}{:^15.2e}"
+          .format(iteration, nfev, cost, cost_reduction,
+                  step_norm, optimality))
+
+
+def print_header_linear():
+    print("{:^15}{:^15}{:^15}{:^15}{:^15}"
+          .format("Iteration", "Cost", "Cost reduction", "Step norm",
+                  "Optimality"))
+
+
+def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
+                           optimality):
+    if cost_reduction is None:
+        cost_reduction = " " * 15
+    else:
+        cost_reduction = f"{cost_reduction:^15.2e}"
+
+    if step_norm is None:
+        step_norm = " " * 15
+    else:
+        step_norm = f"{step_norm:^15.2e}"
+
+    print(f"{iteration:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}")
+
+
+# Simple helper functions.
+
+
+def compute_grad(J, f):
+    """Compute gradient of the least-squares cost function."""
+    if isinstance(J, LinearOperator):
+        return J.rmatvec(f)
+    else:
+        return J.T.dot(f)
+
+
+def compute_jac_scale(J, scale_inv_old=None):
+    """Compute variables scale based on the Jacobian matrix."""
+    if issparse(J):
+        scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
+    else:
+        scale_inv = np.sum(J**2, axis=0)**0.5
+
+    if scale_inv_old is None:
+        scale_inv[scale_inv == 0] = 1
+    else:
+        scale_inv = np.maximum(scale_inv, scale_inv_old)
+
+    return 1 / scale_inv, scale_inv
+
+
+def left_multiplied_operator(J, d):
+    """Return diag(d) J as LinearOperator."""
+    J = aslinearoperator(J)
+
+    def matvec(x):
+        return d * J.matvec(x)
+
+    def matmat(X):
+        return d[:, np.newaxis] * J.matmat(X)
+
+    def rmatvec(x):
+        return J.rmatvec(x.ravel() * d)
+
+    return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
+                          rmatvec=rmatvec)
+
+
+def right_multiplied_operator(J, d):
+    """Return J diag(d) as LinearOperator."""
+    J = aslinearoperator(J)
+
+    def matvec(x):
+        return J.matvec(np.ravel(x) * d)
+
+    def matmat(X):
+        return J.matmat(X * d[:, np.newaxis])
+
+    def rmatvec(x):
+        return d * J.rmatvec(x)
+
+    return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
+                          rmatvec=rmatvec)
+
+
+def regularized_lsq_operator(J, diag):
+    """Return a matrix arising in regularized least squares as LinearOperator.
+
+    The matrix is
+        [ J ]
+        [ D ]
+    where D is diagonal matrix with elements from `diag`.
+    """
+    J = aslinearoperator(J)
+    m, n = J.shape
+
+    def matvec(x):
+        return np.hstack((J.matvec(x), diag * x))
+
+    def rmatvec(x):
+        x1 = x[:m]
+        x2 = x[m:]
+        return J.rmatvec(x1) + diag * x2
+
+    return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
+
+
+def right_multiply(J, d, copy=True):
+    """Compute J diag(d).
+
+    If `copy` is False, `J` is modified in place (unless being LinearOperator).
+    """
+    if copy and not isinstance(J, LinearOperator):
+        J = J.copy()
+
+    if issparse(J):
+        J.data *= d.take(J.indices, mode='clip')  # scikit-learn recipe.
+    elif isinstance(J, LinearOperator):
+        J = right_multiplied_operator(J, d)
+    else:
+        J *= d
+
+    return J
+
+
+def left_multiply(J, d, copy=True):
+    """Compute diag(d) J.
+
+    If `copy` is False, `J` is modified in place (unless being LinearOperator).
+    """
+    if copy and not isinstance(J, LinearOperator):
+        J = J.copy()
+
+    if issparse(J):
+        J.data *= np.repeat(d, np.diff(J.indptr))  # scikit-learn recipe.
+    elif isinstance(J, LinearOperator):
+        J = left_multiplied_operator(J, d)
+    else:
+        J *= d[:, np.newaxis]
+
+    return J
+
+
+def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
+    """Check termination condition for nonlinear least squares."""
+    ftol_satisfied = dF < ftol * F and ratio > 0.25
+    xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
+
+    if ftol_satisfied and xtol_satisfied:
+        return 4
+    elif ftol_satisfied:
+        return 2
+    elif xtol_satisfied:
+        return 3
+    else:
+        return None
+
+
+def scale_for_robust_loss_function(J, f, rho):
+    """Scale Jacobian and residuals for a robust loss function.
+
+    Arrays are modified in place.
+    """
+    J_scale = rho[1] + 2 * rho[2] * f**2
+    J_scale[J_scale < EPS] = EPS
+    J_scale **= 0.5
+
+    f *= rho[1] / J_scale
+
+    return left_multiply(J, J_scale, copy=False), f
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/dogbox.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/dogbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bb5abbe79028afed7b110603a0d5dfd6affae7f
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/dogbox.py
@@ -0,0 +1,331 @@
+"""
+Dogleg algorithm with rectangular trust regions for least-squares minimization.
+
+The description of the algorithm can be found in [Voglis]_. The algorithm does
+trust-region iterations, but the shape of trust regions is rectangular as
+opposed to conventional elliptical. The intersection of a trust region and
+an initial feasible region is again some rectangle. Thus, on each iteration a
+bound-constrained quadratic optimization problem is solved.
+
+A quadratic problem is solved by well-known dogleg approach, where the
+function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
+Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
+along this path, and optimization amounts to simply following along this
+path as long as a point stays within the bounds. A constrained Cauchy step
+(along the anti-gradient) is considered for safety in rank deficient cases,
+in this situations the convergence might be slow.
+
+If during iterations some variable hit the initial bound and the component
+of anti-gradient points outside the feasible region, then a next dogleg step
+won't make any progress. At this state such variables satisfy first-order
+optimality conditions and they are excluded before computing a next dogleg
+step.
+
+Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
+Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
+dense and sparse matrices, or Jacobian being LinearOperator). The second
+option allows to solve very large problems (up to couple of millions of
+residuals on a regular PC), provided the Jacobian matrix is sufficiently
+sparse. But note that dogbox is not very good for solving problems with
+large number of constraints, because of variables exclusion-inclusion on each
+iteration (a required number of function evaluations might be high or accuracy
+of a solution will be poor), thus its large-scale usage is probably limited
+to unconstrained problems.
+
+References
+----------
+.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
+            Approach for Unconstrained and Bound Constrained Nonlinear
+            Optimization", WSEAS International Conference on Applied
+            Mathematics, Corfu, Greece, 2004.
+.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
+"""
+import numpy as np
+from numpy.linalg import lstsq, norm
+
+from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
+from scipy.optimize import OptimizeResult
+
+from .common import (
+    step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
+    build_quadratic_1d, minimize_quadratic_1d, compute_grad,
+    compute_jac_scale, check_termination, scale_for_robust_loss_function,
+    print_header_nonlinear, print_iteration_nonlinear)
+
+
+def lsmr_operator(Jop, d, active_set):
+    """Compute LinearOperator to use in LSMR by dogbox algorithm.
+
+    `active_set` mask is used to excluded active variables from computations
+    of matrix-vector products.
+    """
+    m, n = Jop.shape
+
+    def matvec(x):
+        x_free = x.ravel().copy()
+        x_free[active_set] = 0
+        return Jop.matvec(x * d)
+
+    def rmatvec(x):
+        r = d * Jop.rmatvec(x)
+        r[active_set] = 0
+        return r
+
+    return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
+
+
+def find_intersection(x, tr_bounds, lb, ub):
+    """Find intersection of trust-region bounds and initial bounds.
+
+    Returns
+    -------
+    lb_total, ub_total : ndarray with shape of x
+        Lower and upper bounds of the intersection region.
+    orig_l, orig_u : ndarray of bool with shape of x
+        True means that an original bound is taken as a corresponding bound
+        in the intersection region.
+    tr_l, tr_u : ndarray of bool with shape of x
+        True means that a trust-region bound is taken as a corresponding bound
+        in the intersection region.
+    """
+    lb_centered = lb - x
+    ub_centered = ub - x
+
+    lb_total = np.maximum(lb_centered, -tr_bounds)
+    ub_total = np.minimum(ub_centered, tr_bounds)
+
+    orig_l = np.equal(lb_total, lb_centered)
+    orig_u = np.equal(ub_total, ub_centered)
+
+    tr_l = np.equal(lb_total, -tr_bounds)
+    tr_u = np.equal(ub_total, tr_bounds)
+
+    return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
+
+
+def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
+    """Find dogleg step in a rectangular region.
+
+    Returns
+    -------
+    step : ndarray, shape (n,)
+        Computed dogleg step.
+    bound_hits : ndarray of int, shape (n,)
+        Each component shows whether a corresponding variable hits the
+        initial bound after the step is taken:
+            *  0 - a variable doesn't hit the bound.
+            * -1 - lower bound is hit.
+            *  1 - upper bound is hit.
+    tr_hit : bool
+        Whether the step hit the boundary of the trust-region.
+    """
+    lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
+        x, tr_bounds, lb, ub
+    )
+    bound_hits = np.zeros_like(x, dtype=int)
+
+    if in_bounds(newton_step, lb_total, ub_total):
+        return newton_step, bound_hits, False
+
+    to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
+
+    # The classical dogleg algorithm would check if Cauchy step fits into
+    # the bounds, and just return it constrained version if not. But in a
+    # rectangular trust region it makes sense to try to improve constrained
+    # Cauchy step too. Thus, we don't distinguish these two cases.
+
+    cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
+
+    step_diff = newton_step - cauchy_step
+    step_size, hits = step_size_to_bound(cauchy_step, step_diff,
+                                         lb_total, ub_total)
+    bound_hits[(hits < 0) & orig_l] = -1
+    bound_hits[(hits > 0) & orig_u] = 1
+    tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
+
+    return cauchy_step + step_size * step_diff, bound_hits, tr_hit
+
+
+def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
+           loss_function, tr_solver, tr_options, verbose):
+    f = f0
+    f_true = f.copy()
+    nfev = 1
+
+    J = J0
+    njev = 1
+
+    if loss_function is not None:
+        rho = loss_function(f)
+        cost = 0.5 * np.sum(rho[0])
+        J, f = scale_for_robust_loss_function(J, f, rho)
+    else:
+        cost = 0.5 * np.dot(f, f)
+
+    g = compute_grad(J, f)
+
+    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
+    if jac_scale:
+        scale, scale_inv = compute_jac_scale(J)
+    else:
+        scale, scale_inv = x_scale, 1 / x_scale
+
+    Delta = norm(x0 * scale_inv, ord=np.inf)
+    if Delta == 0:
+        Delta = 1.0
+
+    on_bound = np.zeros_like(x0, dtype=int)
+    on_bound[np.equal(x0, lb)] = -1
+    on_bound[np.equal(x0, ub)] = 1
+
+    x = x0
+    step = np.empty_like(x0)
+
+    if max_nfev is None:
+        max_nfev = x0.size * 100
+
+    termination_status = None
+    iteration = 0
+    step_norm = None
+    actual_reduction = None
+
+    if verbose == 2:
+        print_header_nonlinear()
+
+    while True:
+        active_set = on_bound * g < 0
+        free_set = ~active_set
+
+        g_free = g[free_set]
+        g_full = g.copy()
+        g[active_set] = 0
+
+        g_norm = norm(g, ord=np.inf)
+        if g_norm < gtol:
+            termination_status = 1
+
+        if verbose == 2:
+            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
+                                      step_norm, g_norm)
+
+        if termination_status is not None or nfev == max_nfev:
+            break
+
+        x_free = x[free_set]
+        lb_free = lb[free_set]
+        ub_free = ub[free_set]
+        scale_free = scale[free_set]
+
+        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
+        if tr_solver == 'exact':
+            J_free = J[:, free_set]
+            newton_step = lstsq(J_free, -f, rcond=-1)[0]
+
+            # Coefficients for the quadratic model along the anti-gradient.
+            a, b = build_quadratic_1d(J_free, g_free, -g_free)
+        elif tr_solver == 'lsmr':
+            Jop = aslinearoperator(J)
+
+            # We compute lsmr step in scaled variables and then
+            # transform back to normal variables, if lsmr would give exact lsq
+            # solution, this would be equivalent to not doing any
+            # transformations, but from experience it's better this way.
+
+            # We pass active_set to make computations as if we selected
+            # the free subset of J columns, but without actually doing any
+            # slicing, which is expensive for sparse matrices and impossible
+            # for LinearOperator.
+
+            lsmr_op = lsmr_operator(Jop, scale, active_set)
+            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
+            newton_step *= scale_free
+
+            # Components of g for active variables were zeroed, so this call
+            # is correct and equivalent to using J_free and g_free.
+            a, b = build_quadratic_1d(Jop, g, -g)
+
+        actual_reduction = -1.0
+        while actual_reduction <= 0 and nfev < max_nfev:
+            tr_bounds = Delta * scale_free
+
+            step_free, on_bound_free, tr_hit = dogleg_step(
+                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
+
+            step.fill(0.0)
+            step[free_set] = step_free
+
+            if tr_solver == 'exact':
+                predicted_reduction = -evaluate_quadratic(J_free, g_free,
+                                                          step_free)
+            elif tr_solver == 'lsmr':
+                predicted_reduction = -evaluate_quadratic(Jop, g, step)
+
+            # gh11403 ensure that solution is fully within bounds.
+            x_new = np.clip(x + step, lb, ub)
+
+            f_new = fun(x_new)
+            nfev += 1
+
+            step_h_norm = norm(step * scale_inv, ord=np.inf)
+
+            if not np.all(np.isfinite(f_new)):
+                Delta = 0.25 * step_h_norm
+                continue
+
+            # Usual trust-region step quality estimation.
+            if loss_function is not None:
+                cost_new = loss_function(f_new, cost_only=True)
+            else:
+                cost_new = 0.5 * np.dot(f_new, f_new)
+            actual_reduction = cost - cost_new
+
+            Delta, ratio = update_tr_radius(
+                Delta, actual_reduction, predicted_reduction,
+                step_h_norm, tr_hit
+            )
+
+            step_norm = norm(step)
+            termination_status = check_termination(
+                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
+
+            if termination_status is not None:
+                break
+
+        if actual_reduction > 0:
+            on_bound[free_set] = on_bound_free
+
+            x = x_new
+            # Set variables exactly at the boundary.
+            mask = on_bound == -1
+            x[mask] = lb[mask]
+            mask = on_bound == 1
+            x[mask] = ub[mask]
+
+            f = f_new
+            f_true = f.copy()
+
+            cost = cost_new
+
+            J = jac(x, f)
+            njev += 1
+
+            if loss_function is not None:
+                rho = loss_function(f)
+                J, f = scale_for_robust_loss_function(J, f, rho)
+
+            g = compute_grad(J, f)
+
+            if jac_scale:
+                scale, scale_inv = compute_jac_scale(J, scale_inv)
+        else:
+            step_norm = 0
+            actual_reduction = 0
+
+        iteration += 1
+
+    if termination_status is None:
+        termination_status = 0
+
+    return OptimizeResult(
+        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
+        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/least_squares.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/least_squares.py
new file mode 100644
index 0000000000000000000000000000000000000000..db8bb31c7b1530fd48ac7ae58cf501e2b0081a91
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/least_squares.py
@@ -0,0 +1,967 @@
+"""Generic interface for least-squares minimization."""
+from warnings import warn
+
+import numpy as np
+from numpy.linalg import norm
+
+from scipy.sparse import issparse
+from scipy.sparse.linalg import LinearOperator
+from scipy.optimize import _minpack, OptimizeResult
+from scipy.optimize._numdiff import approx_derivative, group_columns
+from scipy.optimize._minimize import Bounds
+
+from .trf import trf
+from .dogbox import dogbox
+from .common import EPS, in_bounds, make_strictly_feasible
+
+
+TERMINATION_MESSAGES = {
+    -1: "Improper input parameters status returned from `leastsq`",
+    0: "The maximum number of function evaluations is exceeded.",
+    1: "`gtol` termination condition is satisfied.",
+    2: "`ftol` termination condition is satisfied.",
+    3: "`xtol` termination condition is satisfied.",
+    4: "Both `ftol` and `xtol` termination conditions are satisfied."
+}
+
+
+FROM_MINPACK_TO_COMMON = {
+    0: -1,  # Improper input parameters from MINPACK.
+    1: 2,
+    2: 3,
+    3: 4,
+    4: 1,
+    5: 0
+    # There are 6, 7, 8 for too small tolerance parameters,
+    # but we guard against it by checking ftol, xtol, gtol beforehand.
+}
+
+
+def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
+    n = x0.size
+
+    if diff_step is None:
+        epsfcn = EPS
+    else:
+        epsfcn = diff_step**2
+
+    # Compute MINPACK's `diag`, which is inverse of our `x_scale` and
+    # ``x_scale='jac'`` corresponds to ``diag=None``.
+    if isinstance(x_scale, str) and x_scale == 'jac':
+        diag = None
+    else:
+        diag = 1 / x_scale
+
+    full_output = True
+    col_deriv = False
+    factor = 100.0
+
+    if jac is None:
+        if max_nfev is None:
+            # n squared to account for Jacobian evaluations.
+            max_nfev = 100 * n * (n + 1)
+        x, info, status = _minpack._lmdif(
+            fun, x0, (), full_output, ftol, xtol, gtol,
+            max_nfev, epsfcn, factor, diag)
+    else:
+        if max_nfev is None:
+            max_nfev = 100 * n
+        x, info, status = _minpack._lmder(
+            fun, jac, x0, (), full_output, col_deriv,
+            ftol, xtol, gtol, max_nfev, factor, diag)
+
+    f = info['fvec']
+
+    if callable(jac):
+        J = jac(x)
+    else:
+        J = np.atleast_2d(approx_derivative(fun, x))
+
+    cost = 0.5 * np.dot(f, f)
+    g = J.T.dot(f)
+    g_norm = norm(g, ord=np.inf)
+
+    nfev = info['nfev']
+    njev = info.get('njev', None)
+
+    status = FROM_MINPACK_TO_COMMON[status]
+    active_mask = np.zeros_like(x0, dtype=int)
+
+    return OptimizeResult(
+        x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
+        active_mask=active_mask, nfev=nfev, njev=njev, status=status)
+
+
+def prepare_bounds(bounds, n):
+    lb, ub = (np.asarray(b, dtype=float) for b in bounds)
+    if lb.ndim == 0:
+        lb = np.resize(lb, n)
+
+    if ub.ndim == 0:
+        ub = np.resize(ub, n)
+
+    return lb, ub
+
+
+def check_tolerance(ftol, xtol, gtol, method):
+    def check(tol, name):
+        if tol is None:
+            tol = 0
+        elif tol < EPS:
+            warn(f"Setting `{name}` below the machine epsilon ({EPS:.2e}) effectively "
+                 f"disables the corresponding termination condition.",
+                 stacklevel=3)
+        return tol
+
+    ftol = check(ftol, "ftol")
+    xtol = check(xtol, "xtol")
+    gtol = check(gtol, "gtol")
+
+    if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
+        raise ValueError("All tolerances must be higher than machine epsilon "
+                         f"({EPS:.2e}) for method 'lm'.")
+    elif ftol < EPS and xtol < EPS and gtol < EPS:
+        raise ValueError("At least one of the tolerances must be higher than "
+                         f"machine epsilon ({EPS:.2e}).")
+
+    return ftol, xtol, gtol
+
+
+def check_x_scale(x_scale, x0):
+    if isinstance(x_scale, str) and x_scale == 'jac':
+        return x_scale
+
+    try:
+        x_scale = np.asarray(x_scale, dtype=float)
+        valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
+    except (ValueError, TypeError):
+        valid = False
+
+    if not valid:
+        raise ValueError("`x_scale` must be 'jac' or array_like with "
+                         "positive numbers.")
+
+    if x_scale.ndim == 0:
+        x_scale = np.resize(x_scale, x0.shape)
+
+    if x_scale.shape != x0.shape:
+        raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
+
+    return x_scale
+
+
+def check_jac_sparsity(jac_sparsity, m, n):
+    if jac_sparsity is None:
+        return None
+
+    if not issparse(jac_sparsity):
+        jac_sparsity = np.atleast_2d(jac_sparsity)
+
+    if jac_sparsity.shape != (m, n):
+        raise ValueError("`jac_sparsity` has wrong shape.")
+
+    return jac_sparsity, group_columns(jac_sparsity)
+
+
+# Loss functions.
+
+
+def huber(z, rho, cost_only):
+    mask = z <= 1
+    rho[0, mask] = z[mask]
+    rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
+    if cost_only:
+        return
+    rho[1, mask] = 1
+    rho[1, ~mask] = z[~mask]**-0.5
+    rho[2, mask] = 0
+    rho[2, ~mask] = -0.5 * z[~mask]**-1.5
+
+
+def soft_l1(z, rho, cost_only):
+    t = 1 + z
+    rho[0] = 2 * (t**0.5 - 1)
+    if cost_only:
+        return
+    rho[1] = t**-0.5
+    rho[2] = -0.5 * t**-1.5
+
+
+def cauchy(z, rho, cost_only):
+    rho[0] = np.log1p(z)
+    if cost_only:
+        return
+    t = 1 + z
+    rho[1] = 1 / t
+    rho[2] = -1 / t**2
+
+
+def arctan(z, rho, cost_only):
+    rho[0] = np.arctan(z)
+    if cost_only:
+        return
+    t = 1 + z**2
+    rho[1] = 1 / t
+    rho[2] = -2 * z / t**2
+
+
+IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
+                          cauchy=cauchy, arctan=arctan)
+
+
+def construct_loss_function(m, loss, f_scale):
+    if loss == 'linear':
+        return None
+
+    if not callable(loss):
+        loss = IMPLEMENTED_LOSSES[loss]
+        rho = np.empty((3, m))
+
+        def loss_function(f, cost_only=False):
+            z = (f / f_scale) ** 2
+            loss(z, rho, cost_only=cost_only)
+            if cost_only:
+                return 0.5 * f_scale ** 2 * np.sum(rho[0])
+            rho[0] *= f_scale ** 2
+            rho[2] /= f_scale ** 2
+            return rho
+    else:
+        def loss_function(f, cost_only=False):
+            z = (f / f_scale) ** 2
+            rho = loss(z)
+            if cost_only:
+                return 0.5 * f_scale ** 2 * np.sum(rho[0])
+            rho[0] *= f_scale ** 2
+            rho[2] /= f_scale ** 2
+            return rho
+
+    return loss_function
+
+
+def least_squares(
+        fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
+        ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
+        f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
+        jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
+    """Solve a nonlinear least-squares problem with bounds on the variables.
+
+    Given the residuals f(x) (an m-D real function of n real
+    variables) and the loss function rho(s) (a scalar function), `least_squares`
+    finds a local minimum of the cost function F(x)::
+
+        minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
+        subject to lb <= x <= ub
+
+    The purpose of the loss function rho(s) is to reduce the influence of
+    outliers on the solution.
+
+    Parameters
+    ----------
+    fun : callable
+        Function which computes the vector of residuals, with the signature
+        ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
+        respect to its first argument. The argument ``x`` passed to this
+        function is an ndarray of shape (n,) (never a scalar, even for n=1).
+        It must allocate and return a 1-D array_like of shape (m,) or a scalar.
+        If the argument ``x`` is complex or the function ``fun`` returns
+        complex residuals, it must be wrapped in a real function of real
+        arguments, as shown at the end of the Examples section.
+    x0 : array_like with shape (n,) or float
+        Initial guess on independent variables. If float, it will be treated
+        as a 1-D array with one element. When `method` is 'trf', the initial
+        guess might be slightly adjusted to lie sufficiently within the given
+        `bounds`.
+    jac : {'2-point', '3-point', 'cs', callable}, optional
+        Method of computing the Jacobian matrix (an m-by-n matrix, where
+        element (i, j) is the partial derivative of f[i] with respect to
+        x[j]). The keywords select a finite difference scheme for numerical
+        estimation. The scheme '3-point' is more accurate, but requires
+        twice as many operations as '2-point' (default). The scheme 'cs'
+        uses complex steps, and while potentially the most accurate, it is
+        applicable only when `fun` correctly handles complex inputs and
+        can be analytically continued to the complex plane. Method 'lm'
+        always uses the '2-point' scheme. If callable, it is used as
+        ``jac(x, *args, **kwargs)`` and should return a good approximation
+        (or the exact value) for the Jacobian as an array_like (np.atleast_2d
+        is applied), a sparse matrix (csr_matrix preferred for performance) or
+        a `scipy.sparse.linalg.LinearOperator`.
+    bounds : 2-tuple of array_like or `Bounds`, optional
+        There are two ways to specify bounds:
+
+            1. Instance of `Bounds` class
+            2. Lower and upper bounds on independent variables. Defaults to no
+               bounds. Each array must match the size of `x0` or be a scalar,
+               in the latter case a bound will be the same for all variables.
+               Use ``np.inf`` with an appropriate sign to disable bounds on all
+               or some variables.
+    method : {'trf', 'dogbox', 'lm'}, optional
+        Algorithm to perform minimization.
+
+            * 'trf' : Trust Region Reflective algorithm, particularly suitable
+              for large sparse problems with bounds. Generally robust method.
+            * 'dogbox' : dogleg algorithm with rectangular trust regions,
+              typical use case is small problems with bounds. Not recommended
+              for problems with rank-deficient Jacobian.
+            * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
+              Doesn't handle bounds and sparse Jacobians. Usually the most
+              efficient method for small unconstrained problems.
+
+        Default is 'trf'. See Notes for more information.
+    ftol : float or None, optional
+        Tolerance for termination by the change of the cost function. Default
+        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        and there was an adequate agreement between a local quadratic model and
+        the true model in the last step.
+
+        If None and 'method' is not 'lm', the termination by this condition is
+        disabled. If 'method' is 'lm', this tolerance must be higher than
+        machine epsilon.
+    xtol : float or None, optional
+        Tolerance for termination by the change of the independent variables.
+        Default is 1e-8. The exact condition depends on the `method` used:
+
+            * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
+            * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
+              a trust-region radius and ``xs`` is the value of ``x``
+              scaled according to `x_scale` parameter (see below).
+
+        If None and 'method' is not 'lm', the termination by this condition is
+        disabled. If 'method' is 'lm', this tolerance must be higher than
+        machine epsilon.
+    gtol : float or None, optional
+        Tolerance for termination by the norm of the gradient. Default is 1e-8.
+        The exact condition depends on a `method` used:
+
+            * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
+              ``g_scaled`` is the value of the gradient scaled to account for
+              the presence of the bounds [STIR]_.
+            * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
+              ``g_free`` is the gradient with respect to the variables which
+              are not in the optimal state on the boundary.
+            * For 'lm' : the maximum absolute value of the cosine of angles
+              between columns of the Jacobian and the residual vector is less
+              than `gtol`, or the residual vector is zero.
+
+        If None and 'method' is not 'lm', the termination by this condition is
+        disabled. If 'method' is 'lm', this tolerance must be higher than
+        machine epsilon.
+    x_scale : array_like or 'jac', optional
+        Characteristic scale of each variable. Setting `x_scale` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        An alternative view is that the size of a trust region along jth
+        dimension is proportional to ``x_scale[j]``. Improved convergence may
+        be achieved by setting `x_scale` such that a step of a given size
+        along any of the scaled variables has a similar effect on the cost
+        function. If set to 'jac', the scale is iteratively updated using the
+        inverse norms of the columns of the Jacobian matrix (as described in
+        [JJMore]_).
+    loss : str or callable, optional
+        Determines the loss function. The following keyword values are allowed:
+
+            * 'linear' (default) : ``rho(z) = z``. Gives a standard
+              least-squares problem.
+            * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
+              approximation of l1 (absolute value) loss. Usually a good
+              choice for robust least squares.
+            * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
+              similarly to 'soft_l1'.
+            * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
+              influence, but may cause difficulties in optimization process.
+            * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
+              a single residual, has properties similar to 'cauchy'.
+
+        If callable, it must take a 1-D ndarray ``z=f**2`` and return an
+        array_like with shape (3, m) where row 0 contains function values,
+        row 1 contains first derivatives and row 2 contains second
+        derivatives. Method 'lm' supports only 'linear' loss.
+    f_scale : float, optional
+        Value of soft margin between inlier and outlier residuals, default
+        is 1.0. The loss function is evaluated as follows
+        ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
+        and ``rho`` is determined by `loss` parameter. This parameter has
+        no effect with ``loss='linear'``, but for other `loss` values it is
+        of crucial importance.
+    max_nfev : None or int, optional
+        Maximum number of function evaluations before the termination.
+        If None (default), the value is chosen automatically:
+
+            * For 'trf' and 'dogbox' : 100 * n.
+            * For 'lm' :  100 * n if `jac` is callable and 100 * n * (n + 1)
+              otherwise (because 'lm' counts function calls in Jacobian
+              estimation).
+
+    diff_step : None or array_like, optional
+        Determines the relative step size for the finite difference
+        approximation of the Jacobian. The actual step is computed as
+        ``x * diff_step``. If None (default), then `diff_step` is taken to be
+        a conventional "optimal" power of machine epsilon for the finite
+        difference scheme used [NR]_.
+    tr_solver : {None, 'exact', 'lsmr'}, optional
+        Method for solving trust-region subproblems, relevant only for 'trf'
+        and 'dogbox' methods.
+
+            * 'exact' is suitable for not very large problems with dense
+              Jacobian matrices. The computational complexity per iteration is
+              comparable to a singular value decomposition of the Jacobian
+              matrix.
+            * 'lsmr' is suitable for problems with sparse and large Jacobian
+              matrices. It uses the iterative procedure
+              `scipy.sparse.linalg.lsmr` for finding a solution of a linear
+              least-squares problem and only requires matrix-vector product
+              evaluations.
+
+        If None (default), the solver is chosen based on the type of Jacobian
+        returned on the first iteration.
+    tr_options : dict, optional
+        Keyword options passed to trust-region solver.
+
+            * ``tr_solver='exact'``: `tr_options` are ignored.
+            * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
+              Additionally,  ``method='trf'`` supports  'regularize' option
+              (bool, default is True), which adds a regularization term to the
+              normal equation, which improves convergence if the Jacobian is
+              rank-deficient [Byrd]_ (eq. 3.4).
+
+    jac_sparsity : {None, array_like, sparse matrix}, optional
+        Defines the sparsity structure of the Jacobian matrix for finite
+        difference estimation, its shape must be (m, n). If the Jacobian has
+        only few non-zero elements in *each* row, providing the sparsity
+        structure will greatly speed up the computations [Curtis]_. A zero
+        entry means that a corresponding element in the Jacobian is identically
+        zero. If provided, forces the use of 'lsmr' trust-region solver.
+        If None (default), then dense differencing will be used. Has no effect
+        for 'lm' method.
+    verbose : {0, 1, 2}, optional
+        Level of algorithm's verbosity:
+
+            * 0 (default) : work silently.
+            * 1 : display a termination report.
+            * 2 : display progress during iterations (not supported by 'lm'
+              method).
+
+    args, kwargs : tuple and dict, optional
+        Additional arguments passed to `fun` and `jac`. Both empty by default.
+        The calling signature is ``fun(x, *args, **kwargs)`` and the same for
+        `jac`.
+
+    Returns
+    -------
+    result : OptimizeResult
+        `OptimizeResult` with the following fields defined:
+
+            x : ndarray, shape (n,)
+                Solution found.
+            cost : float
+                Value of the cost function at the solution.
+            fun : ndarray, shape (m,)
+                Vector of residuals at the solution.
+            jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
+                Modified Jacobian matrix at the solution, in the sense that J^T J
+                is a Gauss-Newton approximation of the Hessian of the cost function.
+                The type is the same as the one used by the algorithm.
+            grad : ndarray, shape (m,)
+                Gradient of the cost function at the solution.
+            optimality : float
+                First-order optimality measure. In unconstrained problems, it is
+                always the uniform norm of the gradient. In constrained problems,
+                it is the quantity which was compared with `gtol` during iterations.
+            active_mask : ndarray of int, shape (n,)
+                Each component shows whether a corresponding constraint is active
+                (that is, whether a variable is at the bound):
+
+                    *  0 : a constraint is not active.
+                    * -1 : a lower bound is active.
+                    *  1 : an upper bound is active.
+
+                Might be somewhat arbitrary for 'trf' method as it generates a
+                sequence of strictly feasible iterates and `active_mask` is
+                determined within a tolerance threshold.
+            nfev : int
+                Number of function evaluations done. Methods 'trf' and 'dogbox' do
+                not count function calls for numerical Jacobian approximation, as
+                opposed to 'lm' method.
+            njev : int or None
+                Number of Jacobian evaluations done. If numerical Jacobian
+                approximation is used in 'lm' method, it is set to None.
+            status : int
+                The reason for algorithm termination:
+
+                    * -1 : improper input parameters status returned from MINPACK.
+                    *  0 : the maximum number of function evaluations is exceeded.
+                    *  1 : `gtol` termination condition is satisfied.
+                    *  2 : `ftol` termination condition is satisfied.
+                    *  3 : `xtol` termination condition is satisfied.
+                    *  4 : Both `ftol` and `xtol` termination conditions are satisfied.
+
+            message : str
+                Verbal description of the termination reason.
+            success : bool
+                True if one of the convergence criteria is satisfied (`status` > 0).
+
+    See Also
+    --------
+    leastsq : A legacy wrapper for the MINPACK implementation of the
+              Levenberg-Marquadt algorithm.
+    curve_fit : Least-squares minimization applied to a curve-fitting problem.
+
+    Notes
+    -----
+    Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
+    algorithms implemented in MINPACK (lmder, lmdif). It runs the
+    Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
+    The implementation is based on paper [JJMore]_, it is very robust and
+    efficient with a lot of smart tricks. It should be your first choice
+    for unconstrained problems. Note that it doesn't support bounds. Also,
+    it doesn't work when m < n.
+
+    Method 'trf' (Trust Region Reflective) is motivated by the process of
+    solving a system of equations, which constitute the first-order optimality
+    condition for a bound-constrained minimization problem as formulated in
+    [STIR]_. The algorithm iteratively solves trust-region subproblems
+    augmented by a special diagonal quadratic term and with trust-region shape
+    determined by the distance from the bounds and the direction of the
+    gradient. This enhancements help to avoid making steps directly into bounds
+    and efficiently explore the whole space of variables. To further improve
+    convergence, the algorithm considers search directions reflected from the
+    bounds. To obey theoretical requirements, the algorithm keeps iterates
+    strictly feasible. With dense Jacobians trust-region subproblems are
+    solved by an exact method very similar to the one described in [JJMore]_
+    (and implemented in MINPACK). The difference from the MINPACK
+    implementation is that a singular value decomposition of a Jacobian
+    matrix is done once per iteration, instead of a QR decomposition and series
+    of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
+    approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
+    The subspace is spanned by a scaled gradient and an approximate
+    Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
+    constraints are imposed the algorithm is very similar to MINPACK and has
+    generally comparable performance. The algorithm works quite robust in
+    unbounded and bounded problems, thus it is chosen as a default algorithm.
+
+    Method 'dogbox' operates in a trust-region framework, but considers
+    rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
+    The intersection of a current trust region and initial bounds is again
+    rectangular, so on each iteration a quadratic minimization problem subject
+    to bound constraints is solved approximately by Powell's dogleg method
+    [NumOpt]_. The required Gauss-Newton step can be computed exactly for
+    dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
+    sparse Jacobians. The algorithm is likely to exhibit slow convergence when
+    the rank of Jacobian is less than the number of variables. The algorithm
+    often outperforms 'trf' in bounded problems with a small number of
+    variables.
+
+    Robust loss functions are implemented as described in [BA]_. The idea
+    is to modify a residual vector and a Jacobian matrix on each iteration
+    such that computed gradient and Gauss-Newton Hessian approximation match
+    the true gradient and Hessian approximation of the cost function. Then
+    the algorithm proceeds in a normal way, i.e., robust loss functions are
+    implemented as a simple wrapper over standard least-squares algorithms.
+
+    .. versionadded:: 0.17.0
+
+    References
+    ----------
+    .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
+              and Conjugate Gradient Method for Large-Scale Bound-Constrained
+              Minimization Problems," SIAM Journal on Scientific Computing,
+              Vol. 21, Number 1, pp 1-23, 1999.
+    .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
+            Computing. 3rd edition", Sec. 5.7.
+    .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
+              solution of the trust region problem by minimization over
+              two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
+              1988.
+    .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
+                sparse Jacobian matrices", Journal of the Institute of
+                Mathematics and its Applications, 13, pp. 117-120, 1974.
+    .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
+                and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
+                Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
+    .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
+                Dogleg Approach for Unconstrained and Bound Constrained
+                Nonlinear Optimization", WSEAS International Conference on
+                Applied Mathematics, Corfu, Greece, 2004.
+    .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
+                2nd edition", Chapter 4.
+    .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
+            Proceedings of the International Workshop on Vision Algorithms:
+            Theory and Practice, pp. 298-372, 1999.
+
+    Examples
+    --------
+    In this example we find a minimum of the Rosenbrock function without bounds
+    on independent variables.
+
+    >>> import numpy as np
+    >>> def fun_rosenbrock(x):
+    ...     return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
+
+    Notice that we only provide the vector of the residuals. The algorithm
+    constructs the cost function as a sum of squares of the residuals, which
+    gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
+
+    >>> from scipy.optimize import least_squares
+    >>> x0_rosenbrock = np.array([2, 2])
+    >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
+    >>> res_1.x
+    array([ 1.,  1.])
+    >>> res_1.cost
+    9.8669242910846867e-30
+    >>> res_1.optimality
+    8.8928864934219529e-14
+
+    We now constrain the variables, in such a way that the previous solution
+    becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
+    ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
+    to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
+
+    We also provide the analytic Jacobian:
+
+    >>> def jac_rosenbrock(x):
+    ...     return np.array([
+    ...         [-20 * x[0], 10],
+    ...         [-1, 0]])
+
+    Putting this all together, we see that the new solution lies on the bound:
+
+    >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
+    ...                       bounds=([-np.inf, 1.5], np.inf))
+    >>> res_2.x
+    array([ 1.22437075,  1.5       ])
+    >>> res_2.cost
+    0.025213093946805685
+    >>> res_2.optimality
+    1.5885401433157753e-07
+
+    Now we solve a system of equations (i.e., the cost function should be zero
+    at a minimum) for a Broyden tridiagonal vector-valued function of 100000
+    variables:
+
+    >>> def fun_broyden(x):
+    ...     f = (3 - x) * x + 1
+    ...     f[1:] -= x[:-1]
+    ...     f[:-1] -= 2 * x[1:]
+    ...     return f
+
+    The corresponding Jacobian matrix is sparse. We tell the algorithm to
+    estimate it by finite differences and provide the sparsity structure of
+    Jacobian to significantly speed up this process.
+
+    >>> from scipy.sparse import lil_matrix
+    >>> def sparsity_broyden(n):
+    ...     sparsity = lil_matrix((n, n), dtype=int)
+    ...     i = np.arange(n)
+    ...     sparsity[i, i] = 1
+    ...     i = np.arange(1, n)
+    ...     sparsity[i, i - 1] = 1
+    ...     i = np.arange(n - 1)
+    ...     sparsity[i, i + 1] = 1
+    ...     return sparsity
+    ...
+    >>> n = 100000
+    >>> x0_broyden = -np.ones(n)
+    ...
+    >>> res_3 = least_squares(fun_broyden, x0_broyden,
+    ...                       jac_sparsity=sparsity_broyden(n))
+    >>> res_3.cost
+    4.5687069299604613e-23
+    >>> res_3.optimality
+    1.1650454296851518e-11
+
+    Let's also solve a curve fitting problem using robust loss function to
+    take care of outliers in the data. Define the model function as
+    ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
+    observation and a, b, c are parameters to estimate.
+
+    First, define the function which generates the data with noise and
+    outliers, define the model parameters, and generate data:
+
+    >>> from numpy.random import default_rng
+    >>> rng = default_rng()
+    >>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None):
+    ...     rng = default_rng(seed)
+    ...
+    ...     y = a + b * np.exp(t * c)
+    ...
+    ...     error = noise * rng.standard_normal(t.size)
+    ...     outliers = rng.integers(0, t.size, n_outliers)
+    ...     error[outliers] *= 10
+    ...
+    ...     return y + error
+    ...
+    >>> a = 0.5
+    >>> b = 2.0
+    >>> c = -1
+    >>> t_min = 0
+    >>> t_max = 10
+    >>> n_points = 15
+    ...
+    >>> t_train = np.linspace(t_min, t_max, n_points)
+    >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
+
+    Define function for computing residuals and initial estimate of
+    parameters.
+
+    >>> def fun(x, t, y):
+    ...     return x[0] + x[1] * np.exp(x[2] * t) - y
+    ...
+    >>> x0 = np.array([1.0, 1.0, 0.0])
+
+    Compute a standard least-squares solution:
+
+    >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
+
+    Now compute two solutions with two different robust loss functions. The
+    parameter `f_scale` is set to 0.1, meaning that inlier residuals should
+    not significantly exceed 0.1 (the noise level used).
+
+    >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
+    ...                             args=(t_train, y_train))
+    >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
+    ...                         args=(t_train, y_train))
+
+    And, finally, plot all the curves. We see that by selecting an appropriate
+    `loss`  we can get estimates close to optimal even in the presence of
+    strong outliers. But keep in mind that generally it is recommended to try
+    'soft_l1' or 'huber' losses first (if at all necessary) as the other two
+    options may cause difficulties in optimization process.
+
+    >>> t_test = np.linspace(t_min, t_max, n_points * 10)
+    >>> y_true = gen_data(t_test, a, b, c)
+    >>> y_lsq = gen_data(t_test, *res_lsq.x)
+    >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
+    >>> y_log = gen_data(t_test, *res_log.x)
+    ...
+    >>> import matplotlib.pyplot as plt
+    >>> plt.plot(t_train, y_train, 'o')
+    >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
+    >>> plt.plot(t_test, y_lsq, label='linear loss')
+    >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
+    >>> plt.plot(t_test, y_log, label='cauchy loss')
+    >>> plt.xlabel("t")
+    >>> plt.ylabel("y")
+    >>> plt.legend()
+    >>> plt.show()
+
+    In the next example, we show how complex-valued residual functions of
+    complex variables can be optimized with ``least_squares()``. Consider the
+    following function:
+
+    >>> def f(z):
+    ...     return z - (0.5 + 0.5j)
+
+    We wrap it into a function of real variables that returns real residuals
+    by simply handling the real and imaginary parts as independent variables:
+
+    >>> def f_wrap(x):
+    ...     fx = f(x[0] + 1j*x[1])
+    ...     return np.array([fx.real, fx.imag])
+
+    Thus, instead of the original m-D complex function of n complex
+    variables we optimize a 2m-D real function of 2n real variables:
+
+    >>> from scipy.optimize import least_squares
+    >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
+    >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
+    >>> z
+    (0.49999999999925893+0.49999999999925893j)
+
+    """
+    if method not in ['trf', 'dogbox', 'lm']:
+        raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
+
+    if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
+        raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
+                         "callable.")
+
+    if tr_solver not in [None, 'exact', 'lsmr']:
+        raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
+
+    if loss not in IMPLEMENTED_LOSSES and not callable(loss):
+        raise ValueError("`loss` must be one of {} or a callable."
+                         .format(IMPLEMENTED_LOSSES.keys()))
+
+    if method == 'lm' and loss != 'linear':
+        raise ValueError("method='lm' supports only 'linear' loss function.")
+
+    if verbose not in [0, 1, 2]:
+        raise ValueError("`verbose` must be in [0, 1, 2].")
+
+    if max_nfev is not None and max_nfev <= 0:
+        raise ValueError("`max_nfev` must be None or positive integer.")
+
+    if np.iscomplexobj(x0):
+        raise ValueError("`x0` must be real.")
+
+    x0 = np.atleast_1d(x0).astype(float)
+
+    if x0.ndim > 1:
+        raise ValueError("`x0` must have at most 1 dimension.")
+
+    if isinstance(bounds, Bounds):
+        lb, ub = bounds.lb, bounds.ub
+        bounds = (lb, ub)
+    else:
+        if len(bounds) == 2:
+            lb, ub = prepare_bounds(bounds, x0.shape[0])
+        else:
+            raise ValueError("`bounds` must contain 2 elements.")
+
+    if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
+        raise ValueError("Method 'lm' doesn't support bounds.")
+
+    if lb.shape != x0.shape or ub.shape != x0.shape:
+        raise ValueError("Inconsistent shapes between bounds and `x0`.")
+
+    if np.any(lb >= ub):
+        raise ValueError("Each lower bound must be strictly less than each "
+                         "upper bound.")
+
+    if not in_bounds(x0, lb, ub):
+        raise ValueError("`x0` is infeasible.")
+
+    x_scale = check_x_scale(x_scale, x0)
+
+    ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
+
+    if method == 'trf':
+        x0 = make_strictly_feasible(x0, lb, ub)
+
+    def fun_wrapped(x):
+        return np.atleast_1d(fun(x, *args, **kwargs))
+
+    f0 = fun_wrapped(x0)
+
+    if f0.ndim != 1:
+        raise ValueError("`fun` must return at most 1-d array_like. "
+                         f"f0.shape: {f0.shape}")
+
+    if not np.all(np.isfinite(f0)):
+        raise ValueError("Residuals are not finite in the initial point.")
+
+    n = x0.size
+    m = f0.size
+
+    if method == 'lm' and m < n:
+        raise ValueError("Method 'lm' doesn't work when the number of "
+                         "residuals is less than the number of variables.")
+
+    loss_function = construct_loss_function(m, loss, f_scale)
+    if callable(loss):
+        rho = loss_function(f0)
+        if rho.shape != (3, m):
+            raise ValueError("The return value of `loss` callable has wrong "
+                             "shape.")
+        initial_cost = 0.5 * np.sum(rho[0])
+    elif loss_function is not None:
+        initial_cost = loss_function(f0, cost_only=True)
+    else:
+        initial_cost = 0.5 * np.dot(f0, f0)
+
+    if callable(jac):
+        J0 = jac(x0, *args, **kwargs)
+
+        if issparse(J0):
+            J0 = J0.tocsr()
+
+            def jac_wrapped(x, _=None):
+                return jac(x, *args, **kwargs).tocsr()
+
+        elif isinstance(J0, LinearOperator):
+            def jac_wrapped(x, _=None):
+                return jac(x, *args, **kwargs)
+
+        else:
+            J0 = np.atleast_2d(J0)
+
+            def jac_wrapped(x, _=None):
+                return np.atleast_2d(jac(x, *args, **kwargs))
+
+    else:  # Estimate Jacobian by finite differences.
+        if method == 'lm':
+            if jac_sparsity is not None:
+                raise ValueError("method='lm' does not support "
+                                 "`jac_sparsity`.")
+
+            if jac != '2-point':
+                warn(f"jac='{jac}' works equivalently to '2-point' for method='lm'.",
+                     stacklevel=2)
+
+            J0 = jac_wrapped = None
+        else:
+            if jac_sparsity is not None and tr_solver == 'exact':
+                raise ValueError("tr_solver='exact' is incompatible "
+                                 "with `jac_sparsity`.")
+
+            jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
+
+            def jac_wrapped(x, f):
+                J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
+                                      f0=f, bounds=bounds, args=args,
+                                      kwargs=kwargs, sparsity=jac_sparsity)
+                if J.ndim != 2:  # J is guaranteed not sparse.
+                    J = np.atleast_2d(J)
+
+                return J
+
+            J0 = jac_wrapped(x0, f0)
+
+    if J0 is not None:
+        if J0.shape != (m, n):
+            raise ValueError(
+                f"The return value of `jac` has wrong shape: expected {(m, n)}, "
+                f"actual {J0.shape}."
+            )
+
+        if not isinstance(J0, np.ndarray):
+            if method == 'lm':
+                raise ValueError("method='lm' works only with dense "
+                                 "Jacobian matrices.")
+
+            if tr_solver == 'exact':
+                raise ValueError(
+                    "tr_solver='exact' works only with dense "
+                    "Jacobian matrices.")
+
+        jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
+        if isinstance(J0, LinearOperator) and jac_scale:
+            raise ValueError("x_scale='jac' can't be used when `jac` "
+                             "returns LinearOperator.")
+
+        if tr_solver is None:
+            if isinstance(J0, np.ndarray):
+                tr_solver = 'exact'
+            else:
+                tr_solver = 'lsmr'
+
+    if method == 'lm':
+        result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
+                              max_nfev, x_scale, diff_step)
+
+    elif method == 'trf':
+        result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
+                     gtol, max_nfev, x_scale, loss_function, tr_solver,
+                     tr_options.copy(), verbose)
+
+    elif method == 'dogbox':
+        if tr_solver == 'lsmr' and 'regularize' in tr_options:
+            warn("The keyword 'regularize' in `tr_options` is not relevant "
+                 "for 'dogbox' method.",
+                 stacklevel=2)
+            tr_options = tr_options.copy()
+            del tr_options['regularize']
+
+        result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
+                        xtol, gtol, max_nfev, x_scale, loss_function,
+                        tr_solver, tr_options, verbose)
+
+    result.message = TERMINATION_MESSAGES[result.status]
+    result.success = result.status > 0
+
+    if verbose >= 1:
+        print(result.message)
+        print("Function evaluations {}, initial cost {:.4e}, final cost "
+              "{:.4e}, first-order optimality {:.2e}."
+              .format(result.nfev, initial_cost, result.cost,
+                      result.optimality))
+
+    return result
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/lsq_linear.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/lsq_linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdf4d26020109d55a6aea2be3009181a388c722d
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/lsq_linear.py
@@ -0,0 +1,362 @@
+"""Linear least squares with bound constraints on independent variables."""
+import numpy as np
+from numpy.linalg import norm
+from scipy.sparse import issparse, csr_matrix
+from scipy.sparse.linalg import LinearOperator, lsmr
+from scipy.optimize import OptimizeResult
+from scipy.optimize._minimize import Bounds
+
+from .common import in_bounds, compute_grad
+from .trf_linear import trf_linear
+from .bvls import bvls
+
+
+def prepare_bounds(bounds, n):
+    if len(bounds) != 2:
+        raise ValueError("`bounds` must contain 2 elements.")
+    lb, ub = (np.asarray(b, dtype=float) for b in bounds)
+
+    if lb.ndim == 0:
+        lb = np.resize(lb, n)
+
+    if ub.ndim == 0:
+        ub = np.resize(ub, n)
+
+    return lb, ub
+
+
+TERMINATION_MESSAGES = {
+    -1: "The algorithm was not able to make progress on the last iteration.",
+    0: "The maximum number of iterations is exceeded.",
+    1: "The first-order optimality measure is less than `tol`.",
+    2: "The relative change of the cost function is less than `tol`.",
+    3: "The unconstrained solution is optimal."
+}
+
+
+def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10,
+               lsq_solver=None, lsmr_tol=None, max_iter=None,
+               verbose=0, *, lsmr_maxiter=None,):
+    r"""Solve a linear least-squares problem with bounds on the variables.
+
+    Given a m-by-n design matrix A and a target vector b with m elements,
+    `lsq_linear` solves the following optimization problem::
+
+        minimize 0.5 * ||A x - b||**2
+        subject to lb <= x <= ub
+
+    This optimization problem is convex, hence a found minimum (if iterations
+    have converged) is guaranteed to be global.
+
+    Parameters
+    ----------
+    A : array_like, sparse matrix of LinearOperator, shape (m, n)
+        Design matrix. Can be `scipy.sparse.linalg.LinearOperator`.
+    b : array_like, shape (m,)
+        Target vector.
+    bounds : 2-tuple of array_like or `Bounds`, optional
+        Lower and upper bounds on parameters. Defaults to no bounds.
+        There are two ways to specify the bounds:
+
+            - Instance of `Bounds` class.
+
+            - 2-tuple of array_like: Each element of the tuple must be either
+              an array with the length equal to the number of parameters, or a
+              scalar (in which case the bound is taken to be the same for all
+              parameters). Use ``np.inf`` with an appropriate sign to disable
+              bounds on all or some parameters.
+
+    method : 'trf' or 'bvls', optional
+        Method to perform minimization.
+
+            * 'trf' : Trust Region Reflective algorithm adapted for a linear
+              least-squares problem. This is an interior-point-like method
+              and the required number of iterations is weakly correlated with
+              the number of variables.
+            * 'bvls' : Bounded-variable least-squares algorithm. This is
+              an active set method, which requires the number of iterations
+              comparable to the number of variables. Can't be used when `A` is
+              sparse or LinearOperator.
+
+        Default is 'trf'.
+    tol : float, optional
+        Tolerance parameter. The algorithm terminates if a relative change
+        of the cost function is less than `tol` on the last iteration.
+        Additionally, the first-order optimality measure is considered:
+
+            * ``method='trf'`` terminates if the uniform norm of the gradient,
+              scaled to account for the presence of the bounds, is less than
+              `tol`.
+            * ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions
+              are satisfied within `tol` tolerance.
+
+    lsq_solver : {None, 'exact', 'lsmr'}, optional
+        Method of solving unbounded least-squares problems throughout
+        iterations:
+
+            * 'exact' : Use dense QR or SVD decomposition approach. Can't be
+              used when `A` is sparse or LinearOperator.
+            * 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure
+              which requires only matrix-vector product evaluations. Can't
+              be used with ``method='bvls'``.
+
+        If None (default), the solver is chosen based on type of `A`.
+    lsmr_tol : None, float or 'auto', optional
+        Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr`
+        If None (default), it is set to ``1e-2 * tol``. If 'auto', the
+        tolerance will be adjusted based on the optimality of the current
+        iterate, which can speed up the optimization process, but is not always
+        reliable.
+    max_iter : None or int, optional
+        Maximum number of iterations before termination. If None (default), it
+        is set to 100 for ``method='trf'`` or to the number of variables for
+        ``method='bvls'`` (not counting iterations for 'bvls' initialization).
+    verbose : {0, 1, 2}, optional
+        Level of algorithm's verbosity:
+
+            * 0 : work silently (default).
+            * 1 : display a termination report.
+            * 2 : display progress during iterations.
+    lsmr_maxiter : None or int, optional
+        Maximum number of iterations for the lsmr least squares solver,
+        if it is used (by setting ``lsq_solver='lsmr'``). If None (default), it
+        uses lsmr's default of ``min(m, n)`` where ``m`` and ``n`` are the
+        number of rows and columns of `A`, respectively. Has no effect if
+        ``lsq_solver='exact'``.
+
+    Returns
+    -------
+    OptimizeResult with the following fields defined:
+    x : ndarray, shape (n,)
+        Solution found.
+    cost : float
+        Value of the cost function at the solution.
+    fun : ndarray, shape (m,)
+        Vector of residuals at the solution.
+    optimality : float
+        First-order optimality measure. The exact meaning depends on `method`,
+        refer to the description of `tol` parameter.
+    active_mask : ndarray of int, shape (n,)
+        Each component shows whether a corresponding constraint is active
+        (that is, whether a variable is at the bound):
+
+            *  0 : a constraint is not active.
+            * -1 : a lower bound is active.
+            *  1 : an upper bound is active.
+
+        Might be somewhat arbitrary for the `trf` method as it generates a
+        sequence of strictly feasible iterates and active_mask is determined
+        within a tolerance threshold.
+    unbounded_sol : tuple
+        Unbounded least squares solution tuple returned by the least squares
+        solver (set with `lsq_solver` option). If `lsq_solver` is not set or is
+        set to ``'exact'``, the tuple contains an ndarray of shape (n,) with
+        the unbounded solution, an ndarray with the sum of squared residuals,
+        an int with the rank of `A`, and an ndarray with the singular values
+        of `A` (see NumPy's ``linalg.lstsq`` for more information). If
+        `lsq_solver` is set to ``'lsmr'``, the tuple contains an ndarray of
+        shape (n,) with the unbounded solution, an int with the exit code,
+        an int with the number of iterations, and five floats with
+        various norms and the condition number of `A` (see SciPy's
+        ``sparse.linalg.lsmr`` for more information). This output can be
+        useful for determining the convergence of the least squares solver,
+        particularly the iterative ``'lsmr'`` solver. The unbounded least
+        squares problem is to minimize ``0.5 * ||A x - b||**2``.
+    nit : int
+        Number of iterations. Zero if the unconstrained solution is optimal.
+    status : int
+        Reason for algorithm termination:
+
+            * -1 : the algorithm was not able to make progress on the last
+              iteration.
+            *  0 : the maximum number of iterations is exceeded.
+            *  1 : the first-order optimality measure is less than `tol`.
+            *  2 : the relative change of the cost function is less than `tol`.
+            *  3 : the unconstrained solution is optimal.
+
+    message : str
+        Verbal description of the termination reason.
+    success : bool
+        True if one of the convergence criteria is satisfied (`status` > 0).
+
+    See Also
+    --------
+    nnls : Linear least squares with non-negativity constraint.
+    least_squares : Nonlinear least squares with bounds on the variables.
+
+    Notes
+    -----
+    The algorithm first computes the unconstrained least-squares solution by
+    `numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on
+    `lsq_solver`. This solution is returned as optimal if it lies within the
+    bounds.
+
+    Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for
+    a linear least-squares problem. The iterations are essentially the same as
+    in the nonlinear least-squares algorithm, but as the quadratic function
+    model is always accurate, we don't need to track or modify the radius of
+    a trust region. The line search (backtracking) is used as a safety net
+    when a selected step does not decrease the cost function. Read more
+    detailed description of the algorithm in `scipy.optimize.least_squares`.
+
+    Method 'bvls' runs a Python implementation of the algorithm described in
+    [BVLS]_. The algorithm maintains active and free sets of variables, on
+    each iteration chooses a new variable to move from the active set to the
+    free set and then solves the unconstrained least-squares problem on free
+    variables. This algorithm is guaranteed to give an accurate solution
+    eventually, but may require up to n iterations for a problem with n
+    variables. Additionally, an ad-hoc initialization procedure is
+    implemented, that determines which variables to set free or active
+    initially. It takes some number of iterations before actual BVLS starts,
+    but can significantly reduce the number of further iterations.
+
+    References
+    ----------
+    .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
+              and Conjugate Gradient Method for Large-Scale Bound-Constrained
+              Minimization Problems," SIAM Journal on Scientific Computing,
+              Vol. 21, Number 1, pp 1-23, 1999.
+    .. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares:
+              an Algorithm and Applications", Computational Statistics, 10,
+              129-141, 1995.
+
+    Examples
+    --------
+    In this example, a problem with a large sparse matrix and bounds on the
+    variables is solved.
+
+    >>> import numpy as np
+    >>> from scipy.sparse import rand
+    >>> from scipy.optimize import lsq_linear
+    >>> rng = np.random.default_rng()
+    ...
+    >>> m = 20000
+    >>> n = 10000
+    ...
+    >>> A = rand(m, n, density=1e-4, random_state=rng)
+    >>> b = rng.standard_normal(m)
+    ...
+    >>> lb = rng.standard_normal(n)
+    >>> ub = lb + 1
+    ...
+    >>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
+    # may vary
+    The relative change of the cost function is less than `tol`.
+    Number of iterations 16, initial cost 1.5039e+04, final cost 1.1112e+04,
+    first-order optimality 4.66e-08.
+    """
+    if method not in ['trf', 'bvls']:
+        raise ValueError("`method` must be 'trf' or 'bvls'")
+
+    if lsq_solver not in [None, 'exact', 'lsmr']:
+        raise ValueError("`solver` must be None, 'exact' or 'lsmr'.")
+
+    if verbose not in [0, 1, 2]:
+        raise ValueError("`verbose` must be in [0, 1, 2].")
+
+    if issparse(A):
+        A = csr_matrix(A)
+    elif not isinstance(A, LinearOperator):
+        A = np.atleast_2d(np.asarray(A))
+
+    if method == 'bvls':
+        if lsq_solver == 'lsmr':
+            raise ValueError("method='bvls' can't be used with "
+                             "lsq_solver='lsmr'")
+
+        if not isinstance(A, np.ndarray):
+            raise ValueError("method='bvls' can't be used with `A` being "
+                             "sparse or LinearOperator.")
+
+    if lsq_solver is None:
+        if isinstance(A, np.ndarray):
+            lsq_solver = 'exact'
+        else:
+            lsq_solver = 'lsmr'
+    elif lsq_solver == 'exact' and not isinstance(A, np.ndarray):
+        raise ValueError("`exact` solver can't be used when `A` is "
+                         "sparse or LinearOperator.")
+
+    if len(A.shape) != 2:  # No ndim for LinearOperator.
+        raise ValueError("`A` must have at most 2 dimensions.")
+
+    if max_iter is not None and max_iter <= 0:
+        raise ValueError("`max_iter` must be None or positive integer.")
+
+    m, n = A.shape
+
+    b = np.atleast_1d(b)
+    if b.ndim != 1:
+        raise ValueError("`b` must have at most 1 dimension.")
+
+    if b.size != m:
+        raise ValueError("Inconsistent shapes between `A` and `b`.")
+
+    if isinstance(bounds, Bounds):
+        lb = bounds.lb
+        ub = bounds.ub
+    else:
+        lb, ub = prepare_bounds(bounds, n)
+
+    if lb.shape != (n,) and ub.shape != (n,):
+        raise ValueError("Bounds have wrong shape.")
+
+    if np.any(lb >= ub):
+        raise ValueError("Each lower bound must be strictly less than each "
+                         "upper bound.")
+
+    if lsmr_maxiter is not None and lsmr_maxiter < 1:
+        raise ValueError("`lsmr_maxiter` must be None or positive integer.")
+
+    if not ((isinstance(lsmr_tol, float) and lsmr_tol > 0) or
+            lsmr_tol in ('auto', None)):
+        raise ValueError("`lsmr_tol` must be None, 'auto', or positive float.")
+
+    if lsq_solver == 'exact':
+        unbd_lsq = np.linalg.lstsq(A, b, rcond=-1)
+    elif lsq_solver == 'lsmr':
+        first_lsmr_tol = lsmr_tol  # tol of first call to lsmr
+        if lsmr_tol is None or lsmr_tol == 'auto':
+            first_lsmr_tol = 1e-2 * tol  # default if lsmr_tol not defined
+        unbd_lsq = lsmr(A, b, maxiter=lsmr_maxiter,
+                        atol=first_lsmr_tol, btol=first_lsmr_tol)
+    x_lsq = unbd_lsq[0]  # extract the solution from the least squares solver
+
+    if in_bounds(x_lsq, lb, ub):
+        r = A @ x_lsq - b
+        cost = 0.5 * np.dot(r, r)
+        termination_status = 3
+        termination_message = TERMINATION_MESSAGES[termination_status]
+        g = compute_grad(A, r)
+        g_norm = norm(g, ord=np.inf)
+
+        if verbose > 0:
+            print(termination_message)
+            print(f"Final cost {cost:.4e}, first-order optimality {g_norm:.2e}")
+
+        return OptimizeResult(
+            x=x_lsq, fun=r, cost=cost, optimality=g_norm,
+            active_mask=np.zeros(n), unbounded_sol=unbd_lsq,
+            nit=0, status=termination_status,
+            message=termination_message, success=True)
+
+    if method == 'trf':
+        res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
+                         max_iter, verbose, lsmr_maxiter=lsmr_maxiter)
+    elif method == 'bvls':
+        res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose)
+
+    res.unbounded_sol = unbd_lsq
+    res.message = TERMINATION_MESSAGES[res.status]
+    res.success = res.status > 0
+
+    if verbose > 0:
+        print(res.message)
+        print(
+            f"Number of iterations {res.nit}, initial cost {res.initial_cost:.4e}, "
+            f"final cost {res.cost:.4e}, first-order optimality {res.optimality:.2e}."
+        )
+
+    del res.initial_cost
+
+    return res
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf.py
new file mode 100644
index 0000000000000000000000000000000000000000..9154bdba5b2cc41883811ba1820dfc251e515d6c
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf.py
@@ -0,0 +1,560 @@
+"""Trust Region Reflective algorithm for least-squares optimization.
+
+The algorithm is based on ideas from paper [STIR]_. The main idea is to
+account for the presence of the bounds by appropriate scaling of the variables (or,
+equivalently, changing a trust-region shape). Let's introduce a vector v:
+
+           | ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
+    v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
+           | 1,           otherwise
+
+where g is the gradient of a cost function and lb, ub are the bounds. Its
+components are distances to the bounds at which the anti-gradient points (if
+this distance is finite). Define a scaling matrix D = diag(v**0.5).
+First-order optimality conditions can be stated as
+
+    D^2 g(x) = 0.
+
+Meaning that components of the gradient should be zero for strictly interior
+variables, and components must point inside the feasible region for variables
+on the bound.
+
+Now consider this system of equations as a new optimization problem. If the
+point x is strictly interior (not on the bound), then the left-hand side is
+differentiable and the Newton step for it satisfies
+
+    (D^2 H + diag(g) Jv) p = -D^2 g
+
+where H is the Hessian matrix (or its J^T J approximation in least squares),
+Jv is the Jacobian matrix of v with components -1, 1 or 0, such that all
+elements of matrix C = diag(g) Jv are non-negative. Introduce the change
+of the variables x = D x_h (_h would be "hat" in LaTeX). In the new variables,
+we have a Newton step satisfying
+
+    B_h p_h = -g_h,
+
+where B_h = D H D + C, g_h = D g. In least squares B_h = J_h^T J_h, where
+J_h = J D. Note that J_h and g_h are proper Jacobian and gradient with respect
+to "hat" variables. To guarantee global convergence we formulate a
+trust-region problem based on the Newton step in the new variables:
+
+    0.5 * p_h^T B_h p + g_h^T p_h -> min, ||p_h|| <= Delta
+
+In the original space B = H + D^{-1} C D^{-1}, and the equivalent trust-region
+problem is
+
+    0.5 * p^T B p + g^T p -> min, ||D^{-1} p|| <= Delta
+
+Here, the meaning of the matrix D becomes more clear: it alters the shape
+of a trust-region, such that large steps towards the bounds are not allowed.
+In the implementation, the trust-region problem is solved in "hat" space,
+but handling of the bounds is done in the original space (see below and read
+the code).
+
+The introduction of the matrix D doesn't allow to ignore bounds, the algorithm
+must keep iterates strictly feasible (to satisfy aforementioned
+differentiability), the parameter theta controls step back from the boundary
+(see the code for details).
+
+The algorithm does another important trick. If the trust-region solution
+doesn't fit into the bounds, then a reflected (from a firstly encountered
+bound) search direction is considered. For motivation and analysis refer to
+[STIR]_ paper (and other papers of the authors). In practice, it doesn't need
+a lot of justifications, the algorithm simply chooses the best step among
+three: a constrained trust-region step, a reflected step and a constrained
+Cauchy step (a minimizer along -g_h in "hat" space, or -D^2 g in the original
+space).
+
+Another feature is that a trust-region radius control strategy is modified to
+account for appearance of the diagonal C matrix (called diag_h in the code).
+
+Note that all described peculiarities are completely gone as we consider
+problems without bounds (the algorithm becomes a standard trust-region type
+algorithm very similar to ones implemented in MINPACK).
+
+The implementation supports two methods of solving the trust-region problem.
+The first, called 'exact', applies SVD on Jacobian and then solves the problem
+very accurately using the algorithm described in [JJMore]_. It is not
+applicable to large problem. The second, called 'lsmr', uses the 2-D subspace
+approach (sometimes called "indefinite dogleg"), where the problem is solved
+in a subspace spanned by the gradient and the approximate Gauss-Newton step
+found by ``scipy.sparse.linalg.lsmr``. A 2-D trust-region problem is
+reformulated as a 4th order algebraic equation and solved very accurately by
+``numpy.roots``. The subspace approach allows to solve very large problems
+(up to couple of millions of residuals on a regular PC), provided the Jacobian
+matrix is sufficiently sparse.
+
+References
+----------
+.. [STIR] Branch, M.A., T.F. Coleman, and Y. Li, "A Subspace, Interior,
+      and Conjugate Gradient Method for Large-Scale Bound-Constrained
+      Minimization Problems," SIAM Journal on Scientific Computing,
+      Vol. 21, Number 1, pp 1-23, 1999.
+.. [JJMore] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
+    and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
+"""
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import svd, qr
+from scipy.sparse.linalg import lsmr
+from scipy.optimize import OptimizeResult
+
+from .common import (
+    step_size_to_bound, find_active_constraints, in_bounds,
+    make_strictly_feasible, intersect_trust_region, solve_lsq_trust_region,
+    solve_trust_region_2d, minimize_quadratic_1d, build_quadratic_1d,
+    evaluate_quadratic, right_multiplied_operator, regularized_lsq_operator,
+    CL_scaling_vector, compute_grad, compute_jac_scale, check_termination,
+    update_tr_radius, scale_for_robust_loss_function, print_header_nonlinear,
+    print_iteration_nonlinear)
+
+
+def trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
+        loss_function, tr_solver, tr_options, verbose):
+    # For efficiency, it makes sense to run the simplified version of the
+    # algorithm when no bounds are imposed. We decided to write the two
+    # separate functions. It violates the DRY principle, but the individual
+    # functions are kept the most readable.
+    if np.all(lb == -np.inf) and np.all(ub == np.inf):
+        return trf_no_bounds(
+            fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale,
+            loss_function, tr_solver, tr_options, verbose)
+    else:
+        return trf_bounds(
+            fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
+            loss_function, tr_solver, tr_options, verbose)
+
+
+def select_step(x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta):
+    """Select the best step according to Trust Region Reflective algorithm."""
+    if in_bounds(x + p, lb, ub):
+        p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
+        return p, p_h, -p_value
+
+    p_stride, hits = step_size_to_bound(x, p, lb, ub)
+
+    # Compute the reflected direction.
+    r_h = np.copy(p_h)
+    r_h[hits.astype(bool)] *= -1
+    r = d * r_h
+
+    # Restrict trust-region step, such that it hits the bound.
+    p *= p_stride
+    p_h *= p_stride
+    x_on_bound = x + p
+
+    # Reflected direction will cross first either feasible region or trust
+    # region boundary.
+    _, to_tr = intersect_trust_region(p_h, r_h, Delta)
+    to_bound, _ = step_size_to_bound(x_on_bound, r, lb, ub)
+
+    # Find lower and upper bounds on a step size along the reflected
+    # direction, considering the strict feasibility requirement. There is no
+    # single correct way to do that, the chosen approach seems to work best
+    # on test problems.
+    r_stride = min(to_bound, to_tr)
+    if r_stride > 0:
+        r_stride_l = (1 - theta) * p_stride / r_stride
+        if r_stride == to_bound:
+            r_stride_u = theta * to_bound
+        else:
+            r_stride_u = to_tr
+    else:
+        r_stride_l = 0
+        r_stride_u = -1
+
+    # Check if reflection step is available.
+    if r_stride_l <= r_stride_u:
+        a, b, c = build_quadratic_1d(J_h, g_h, r_h, s0=p_h, diag=diag_h)
+        r_stride, r_value = minimize_quadratic_1d(
+            a, b, r_stride_l, r_stride_u, c=c)
+        r_h *= r_stride
+        r_h += p_h
+        r = r_h * d
+    else:
+        r_value = np.inf
+
+    # Now correct p_h to make it strictly interior.
+    p *= theta
+    p_h *= theta
+    p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
+
+    ag_h = -g_h
+    ag = d * ag_h
+
+    to_tr = Delta / norm(ag_h)
+    to_bound, _ = step_size_to_bound(x, ag, lb, ub)
+    if to_bound < to_tr:
+        ag_stride = theta * to_bound
+    else:
+        ag_stride = to_tr
+
+    a, b = build_quadratic_1d(J_h, g_h, ag_h, diag=diag_h)
+    ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride)
+    ag_h *= ag_stride
+    ag *= ag_stride
+
+    if p_value < r_value and p_value < ag_value:
+        return p, p_h, -p_value
+    elif r_value < p_value and r_value < ag_value:
+        return r, r_h, -r_value
+    else:
+        return ag, ag_h, -ag_value
+
+
+def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
+               x_scale, loss_function, tr_solver, tr_options, verbose):
+    x = x0.copy()
+
+    f = f0
+    f_true = f.copy()
+    nfev = 1
+
+    J = J0
+    njev = 1
+    m, n = J.shape
+
+    if loss_function is not None:
+        rho = loss_function(f)
+        cost = 0.5 * np.sum(rho[0])
+        J, f = scale_for_robust_loss_function(J, f, rho)
+    else:
+        cost = 0.5 * np.dot(f, f)
+
+    g = compute_grad(J, f)
+
+    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
+    if jac_scale:
+        scale, scale_inv = compute_jac_scale(J)
+    else:
+        scale, scale_inv = x_scale, 1 / x_scale
+
+    v, dv = CL_scaling_vector(x, g, lb, ub)
+    v[dv != 0] *= scale_inv[dv != 0]
+    Delta = norm(x0 * scale_inv / v**0.5)
+    if Delta == 0:
+        Delta = 1.0
+
+    g_norm = norm(g * v, ord=np.inf)
+
+    f_augmented = np.zeros(m + n)
+    if tr_solver == 'exact':
+        J_augmented = np.empty((m + n, n))
+    elif tr_solver == 'lsmr':
+        reg_term = 0.0
+        regularize = tr_options.pop('regularize', True)
+
+    if max_nfev is None:
+        max_nfev = x0.size * 100
+
+    alpha = 0.0  # "Levenberg-Marquardt" parameter
+
+    termination_status = None
+    iteration = 0
+    step_norm = None
+    actual_reduction = None
+
+    if verbose == 2:
+        print_header_nonlinear()
+
+    while True:
+        v, dv = CL_scaling_vector(x, g, lb, ub)
+
+        g_norm = norm(g * v, ord=np.inf)
+        if g_norm < gtol:
+            termination_status = 1
+
+        if verbose == 2:
+            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
+                                      step_norm, g_norm)
+
+        if termination_status is not None or nfev == max_nfev:
+            break
+
+        # Now compute variables in "hat" space. Here, we also account for
+        # scaling introduced by `x_scale` parameter. This part is a bit tricky,
+        # you have to write down the formulas and see how the trust-region
+        # problem is formulated when the two types of scaling are applied.
+        # The idea is that first we apply `x_scale` and then apply Coleman-Li
+        # approach in the new variables.
+
+        # v is recomputed in the variables after applying `x_scale`, note that
+        # components which were identically 1 not affected.
+        v[dv != 0] *= scale_inv[dv != 0]
+
+        # Here, we apply two types of scaling.
+        d = v**0.5 * scale
+
+        # C = diag(g * scale) Jv
+        diag_h = g * dv * scale
+
+        # After all this has been done, we continue normally.
+
+        # "hat" gradient.
+        g_h = d * g
+
+        f_augmented[:m] = f
+        if tr_solver == 'exact':
+            J_augmented[:m] = J * d
+            J_h = J_augmented[:m]  # Memory view.
+            J_augmented[m:] = np.diag(diag_h**0.5)
+            U, s, V = svd(J_augmented, full_matrices=False)
+            V = V.T
+            uf = U.T.dot(f_augmented)
+        elif tr_solver == 'lsmr':
+            J_h = right_multiplied_operator(J, d)
+
+            if regularize:
+                a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
+                to_tr = Delta / norm(g_h)
+                ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
+                reg_term = -ag_value / Delta**2
+
+            lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
+            gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
+            S = np.vstack((g_h, gn_h)).T
+            S, _ = qr(S, mode='economic')
+            JS = J_h.dot(S)  # LinearOperator does dot too.
+            B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
+            g_S = S.T.dot(g_h)
+
+        # theta controls step back step ratio from the bounds.
+        theta = max(0.995, 1 - g_norm)
+
+        actual_reduction = -1
+        while actual_reduction <= 0 and nfev < max_nfev:
+            if tr_solver == 'exact':
+                p_h, alpha, n_iter = solve_lsq_trust_region(
+                    n, m, uf, s, V, Delta, initial_alpha=alpha)
+            elif tr_solver == 'lsmr':
+                p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
+                p_h = S.dot(p_S)
+
+            p = d * p_h  # Trust-region solution in the original space.
+            step, step_h, predicted_reduction = select_step(
+                x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)
+
+            x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
+            f_new = fun(x_new)
+            nfev += 1
+
+            step_h_norm = norm(step_h)
+
+            if not np.all(np.isfinite(f_new)):
+                Delta = 0.25 * step_h_norm
+                continue
+
+            # Usual trust-region step quality estimation.
+            if loss_function is not None:
+                cost_new = loss_function(f_new, cost_only=True)
+            else:
+                cost_new = 0.5 * np.dot(f_new, f_new)
+            actual_reduction = cost - cost_new
+            Delta_new, ratio = update_tr_radius(
+                Delta, actual_reduction, predicted_reduction,
+                step_h_norm, step_h_norm > 0.95 * Delta)
+
+            step_norm = norm(step)
+            termination_status = check_termination(
+                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
+            if termination_status is not None:
+                break
+
+            alpha *= Delta / Delta_new
+            Delta = Delta_new
+
+        if actual_reduction > 0:
+            x = x_new
+
+            f = f_new
+            f_true = f.copy()
+
+            cost = cost_new
+
+            J = jac(x, f)
+            njev += 1
+
+            if loss_function is not None:
+                rho = loss_function(f)
+                J, f = scale_for_robust_loss_function(J, f, rho)
+
+            g = compute_grad(J, f)
+
+            if jac_scale:
+                scale, scale_inv = compute_jac_scale(J, scale_inv)
+        else:
+            step_norm = 0
+            actual_reduction = 0
+
+        iteration += 1
+
+    if termination_status is None:
+        termination_status = 0
+
+    active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
+    return OptimizeResult(
+        x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
+        active_mask=active_mask, nfev=nfev, njev=njev,
+        status=termination_status)
+
+
+def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
+                  x_scale, loss_function, tr_solver, tr_options, verbose):
+    x = x0.copy()
+
+    f = f0
+    f_true = f.copy()
+    nfev = 1
+
+    J = J0
+    njev = 1
+    m, n = J.shape
+
+    if loss_function is not None:
+        rho = loss_function(f)
+        cost = 0.5 * np.sum(rho[0])
+        J, f = scale_for_robust_loss_function(J, f, rho)
+    else:
+        cost = 0.5 * np.dot(f, f)
+
+    g = compute_grad(J, f)
+
+    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
+    if jac_scale:
+        scale, scale_inv = compute_jac_scale(J)
+    else:
+        scale, scale_inv = x_scale, 1 / x_scale
+
+    Delta = norm(x0 * scale_inv)
+    if Delta == 0:
+        Delta = 1.0
+
+    if tr_solver == 'lsmr':
+        reg_term = 0
+        damp = tr_options.pop('damp', 0.0)
+        regularize = tr_options.pop('regularize', True)
+
+    if max_nfev is None:
+        max_nfev = x0.size * 100
+
+    alpha = 0.0  # "Levenberg-Marquardt" parameter
+
+    termination_status = None
+    iteration = 0
+    step_norm = None
+    actual_reduction = None
+
+    if verbose == 2:
+        print_header_nonlinear()
+
+    while True:
+        g_norm = norm(g, ord=np.inf)
+        if g_norm < gtol:
+            termination_status = 1
+
+        if verbose == 2:
+            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
+                                      step_norm, g_norm)
+
+        if termination_status is not None or nfev == max_nfev:
+            break
+
+        d = scale
+        g_h = d * g
+
+        if tr_solver == 'exact':
+            J_h = J * d
+            U, s, V = svd(J_h, full_matrices=False)
+            V = V.T
+            uf = U.T.dot(f)
+        elif tr_solver == 'lsmr':
+            J_h = right_multiplied_operator(J, d)
+
+            if regularize:
+                a, b = build_quadratic_1d(J_h, g_h, -g_h)
+                to_tr = Delta / norm(g_h)
+                ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
+                reg_term = -ag_value / Delta**2
+
+            damp_full = (damp**2 + reg_term)**0.5
+            gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
+            S = np.vstack((g_h, gn_h)).T
+            S, _ = qr(S, mode='economic')
+            JS = J_h.dot(S)
+            B_S = np.dot(JS.T, JS)
+            g_S = S.T.dot(g_h)
+
+        actual_reduction = -1
+        while actual_reduction <= 0 and nfev < max_nfev:
+            if tr_solver == 'exact':
+                step_h, alpha, n_iter = solve_lsq_trust_region(
+                    n, m, uf, s, V, Delta, initial_alpha=alpha)
+            elif tr_solver == 'lsmr':
+                p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
+                step_h = S.dot(p_S)
+
+            predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
+            step = d * step_h
+            x_new = x + step
+            f_new = fun(x_new)
+            nfev += 1
+
+            step_h_norm = norm(step_h)
+
+            if not np.all(np.isfinite(f_new)):
+                Delta = 0.25 * step_h_norm
+                continue
+
+            # Usual trust-region step quality estimation.
+            if loss_function is not None:
+                cost_new = loss_function(f_new, cost_only=True)
+            else:
+                cost_new = 0.5 * np.dot(f_new, f_new)
+            actual_reduction = cost - cost_new
+
+            Delta_new, ratio = update_tr_radius(
+                Delta, actual_reduction, predicted_reduction,
+                step_h_norm, step_h_norm > 0.95 * Delta)
+
+            step_norm = norm(step)
+            termination_status = check_termination(
+                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
+            if termination_status is not None:
+                break
+
+            alpha *= Delta / Delta_new
+            Delta = Delta_new
+
+        if actual_reduction > 0:
+            x = x_new
+
+            f = f_new
+            f_true = f.copy()
+
+            cost = cost_new
+
+            J = jac(x, f)
+            njev += 1
+
+            if loss_function is not None:
+                rho = loss_function(f)
+                J, f = scale_for_robust_loss_function(J, f, rho)
+
+            g = compute_grad(J, f)
+
+            if jac_scale:
+                scale, scale_inv = compute_jac_scale(J, scale_inv)
+        else:
+            step_norm = 0
+            actual_reduction = 0
+
+        iteration += 1
+
+    if termination_status is None:
+        termination_status = 0
+
+    active_mask = np.zeros_like(x)
+    return OptimizeResult(
+        x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
+        active_mask=active_mask, nfev=nfev, njev=njev,
+        status=termination_status)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf_linear.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf_linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd752763179bcf97945c7f34ce6a9e49e85c819e
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_lsq/trf_linear.py
@@ -0,0 +1,249 @@
+"""The adaptation of Trust Region Reflective algorithm for a linear
+least-squares problem."""
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import qr, solve_triangular
+from scipy.sparse.linalg import lsmr
+from scipy.optimize import OptimizeResult
+
+from .givens_elimination import givens_elimination
+from .common import (
+    EPS, step_size_to_bound, find_active_constraints, in_bounds,
+    make_strictly_feasible, build_quadratic_1d, evaluate_quadratic,
+    minimize_quadratic_1d, CL_scaling_vector, reflective_transformation,
+    print_header_linear, print_iteration_linear, compute_grad,
+    regularized_lsq_operator, right_multiplied_operator)
+
+
+def regularized_lsq_with_qr(m, n, R, QTb, perm, diag, copy_R=True):
+    """Solve regularized least squares using information from QR-decomposition.
+
+    The initial problem is to solve the following system in a least-squares
+    sense::
+
+        A x = b
+        D x = 0
+
+    where D is diagonal matrix. The method is based on QR decomposition
+    of the form A P = Q R, where P is a column permutation matrix, Q is an
+    orthogonal matrix and R is an upper triangular matrix.
+
+    Parameters
+    ----------
+    m, n : int
+        Initial shape of A.
+    R : ndarray, shape (n, n)
+        Upper triangular matrix from QR decomposition of A.
+    QTb : ndarray, shape (n,)
+        First n components of Q^T b.
+    perm : ndarray, shape (n,)
+        Array defining column permutation of A, such that ith column of
+        P is perm[i]-th column of identity matrix.
+    diag : ndarray, shape (n,)
+        Array containing diagonal elements of D.
+
+    Returns
+    -------
+    x : ndarray, shape (n,)
+        Found least-squares solution.
+    """
+    if copy_R:
+        R = R.copy()
+    v = QTb.copy()
+
+    givens_elimination(R, v, diag[perm])
+
+    abs_diag_R = np.abs(np.diag(R))
+    threshold = EPS * max(m, n) * np.max(abs_diag_R)
+    nns, = np.nonzero(abs_diag_R > threshold)
+
+    R = R[np.ix_(nns, nns)]
+    v = v[nns]
+
+    x = np.zeros(n)
+    x[perm[nns]] = solve_triangular(R, v)
+
+    return x
+
+
+def backtracking(A, g, x, p, theta, p_dot_g, lb, ub):
+    """Find an appropriate step size using backtracking line search."""
+    alpha = 1
+    while True:
+        x_new, _ = reflective_transformation(x + alpha * p, lb, ub)
+        step = x_new - x
+        cost_change = -evaluate_quadratic(A, g, step)
+        if cost_change > -0.1 * alpha * p_dot_g:
+            break
+        alpha *= 0.5
+
+    active = find_active_constraints(x_new, lb, ub)
+    if np.any(active != 0):
+        x_new, _ = reflective_transformation(x + theta * alpha * p, lb, ub)
+        x_new = make_strictly_feasible(x_new, lb, ub, rstep=0)
+        step = x_new - x
+        cost_change = -evaluate_quadratic(A, g, step)
+
+    return x, step, cost_change
+
+
+def select_step(x, A_h, g_h, c_h, p, p_h, d, lb, ub, theta):
+    """Select the best step according to Trust Region Reflective algorithm."""
+    if in_bounds(x + p, lb, ub):
+        return p
+
+    p_stride, hits = step_size_to_bound(x, p, lb, ub)
+    r_h = np.copy(p_h)
+    r_h[hits.astype(bool)] *= -1
+    r = d * r_h
+
+    # Restrict step, such that it hits the bound.
+    p *= p_stride
+    p_h *= p_stride
+    x_on_bound = x + p
+
+    # Find the step size along reflected direction.
+    r_stride_u, _ = step_size_to_bound(x_on_bound, r, lb, ub)
+
+    # Stay interior.
+    r_stride_l = (1 - theta) * r_stride_u
+    r_stride_u *= theta
+
+    if r_stride_u > 0:
+        a, b, c = build_quadratic_1d(A_h, g_h, r_h, s0=p_h, diag=c_h)
+        r_stride, r_value = minimize_quadratic_1d(
+            a, b, r_stride_l, r_stride_u, c=c)
+        r_h = p_h + r_h * r_stride
+        r = d * r_h
+    else:
+        r_value = np.inf
+
+    # Now correct p_h to make it strictly interior.
+    p_h *= theta
+    p *= theta
+    p_value = evaluate_quadratic(A_h, g_h, p_h, diag=c_h)
+
+    ag_h = -g_h
+    ag = d * ag_h
+    ag_stride_u, _ = step_size_to_bound(x, ag, lb, ub)
+    ag_stride_u *= theta
+    a, b = build_quadratic_1d(A_h, g_h, ag_h, diag=c_h)
+    ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride_u)
+    ag *= ag_stride
+
+    if p_value < r_value and p_value < ag_value:
+        return p
+    elif r_value < p_value and r_value < ag_value:
+        return r
+    else:
+        return ag
+
+
+def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
+               max_iter, verbose, *, lsmr_maxiter=None):
+    m, n = A.shape
+    x, _ = reflective_transformation(x_lsq, lb, ub)
+    x = make_strictly_feasible(x, lb, ub, rstep=0.1)
+
+    if lsq_solver == 'exact':
+        QT, R, perm = qr(A, mode='economic', pivoting=True)
+        QT = QT.T
+
+        if m < n:
+            R = np.vstack((R, np.zeros((n - m, n))))
+
+        QTr = np.zeros(n)
+        k = min(m, n)
+    elif lsq_solver == 'lsmr':
+        r_aug = np.zeros(m + n)
+        auto_lsmr_tol = False
+        if lsmr_tol is None:
+            lsmr_tol = 1e-2 * tol
+        elif lsmr_tol == 'auto':
+            auto_lsmr_tol = True
+
+    r = A.dot(x) - b
+    g = compute_grad(A, r)
+    cost = 0.5 * np.dot(r, r)
+    initial_cost = cost
+
+    termination_status = None
+    step_norm = None
+    cost_change = None
+
+    if max_iter is None:
+        max_iter = 100
+
+    if verbose == 2:
+        print_header_linear()
+
+    for iteration in range(max_iter):
+        v, dv = CL_scaling_vector(x, g, lb, ub)
+        g_scaled = g * v
+        g_norm = norm(g_scaled, ord=np.inf)
+        if g_norm < tol:
+            termination_status = 1
+
+        if verbose == 2:
+            print_iteration_linear(iteration, cost, cost_change,
+                                   step_norm, g_norm)
+
+        if termination_status is not None:
+            break
+
+        diag_h = g * dv
+        diag_root_h = diag_h ** 0.5
+        d = v ** 0.5
+        g_h = d * g
+
+        A_h = right_multiplied_operator(A, d)
+        if lsq_solver == 'exact':
+            QTr[:k] = QT.dot(r)
+            p_h = -regularized_lsq_with_qr(m, n, R * d[perm], QTr, perm,
+                                           diag_root_h, copy_R=False)
+        elif lsq_solver == 'lsmr':
+            lsmr_op = regularized_lsq_operator(A_h, diag_root_h)
+            r_aug[:m] = r
+            if auto_lsmr_tol:
+                eta = 1e-2 * min(0.5, g_norm)
+                lsmr_tol = max(EPS, min(0.1, eta * g_norm))
+            p_h = -lsmr(lsmr_op, r_aug, maxiter=lsmr_maxiter,
+                        atol=lsmr_tol, btol=lsmr_tol)[0]
+
+        p = d * p_h
+
+        p_dot_g = np.dot(p, g)
+        if p_dot_g > 0:
+            termination_status = -1
+
+        theta = 1 - min(0.005, g_norm)
+        step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta)
+        cost_change = -evaluate_quadratic(A, g, step)
+
+        # Perhaps almost never executed, the idea is that `p` is descent
+        # direction thus we must find acceptable cost decrease using simple
+        # "backtracking", otherwise the algorithm's logic would break.
+        if cost_change < 0:
+            x, step, cost_change = backtracking(
+                A, g, x, p, theta, p_dot_g, lb, ub)
+        else:
+            x = make_strictly_feasible(x + step, lb, ub, rstep=0)
+
+        step_norm = norm(step)
+        r = A.dot(x) - b
+        g = compute_grad(A, r)
+
+        if cost_change < tol * cost:
+            termination_status = 2
+
+        cost = 0.5 * np.dot(r, r)
+
+    if termination_status is None:
+        termination_status = 0
+
+    active_mask = find_active_constraints(x, lb, ub, rtol=tol)
+
+    return OptimizeResult(
+        x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask,
+        nit=iteration + 1, status=termination_status,
+        initial_cost=initial_cost)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f6964335bb598261ca3fb6d6b9de4d8b2d43a59
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_complex.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_complex.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24e54ef2afcb2c5594027446c9fb5cfac2e9d94c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_complex.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_vertex.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_vertex.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f0f1449bb4ee99fcf814feb036619686531715c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/__pycache__/_vertex.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_complex.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_complex.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e4d1ac1beea5e38b1b89cc9a0d6e1453ddd7174
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_complex.py
@@ -0,0 +1,1225 @@
+"""Base classes for low memory simplicial complex structures."""
+import copy
+import logging
+import itertools
+import decimal
+from functools import cache
+
+import numpy as np
+
+from ._vertex import (VertexCacheField, VertexCacheIndex)
+
+
+class Complex:
+    """
+    Base class for a simplicial complex described as a cache of vertices
+    together with their connections.
+
+    Important methods:
+        Domain triangulation:
+                Complex.triangulate, Complex.split_generation
+        Triangulating arbitrary points (must be traingulable,
+            may exist outside domain):
+                Complex.triangulate(sample_set)
+        Converting another simplicial complex structure data type to the
+            structure used in Complex (ex. OBJ wavefront)
+                Complex.convert(datatype, data)
+
+    Important objects:
+        HC.V: The cache of vertices and their connection
+        HC.H: Storage structure of all vertex groups
+
+    Parameters
+    ----------
+    dim : int
+        Spatial dimensionality of the complex R^dim
+    domain : list of tuples, optional
+        The bounds [x_l, x_u]^dim of the hyperrectangle space
+        ex. The default domain is the hyperrectangle [0, 1]^dim
+        Note: The domain must be convex, non-convex spaces can be cut
+              away from this domain using the non-linear
+              g_cons functions to define any arbitrary domain
+              (these domains may also be disconnected from each other)
+    sfield :
+        A scalar function defined in the associated domain f: R^dim --> R
+    sfield_args : tuple
+        Additional arguments to be passed to `sfield`
+    vfield :
+        A scalar function defined in the associated domain
+                       f: R^dim --> R^m
+                   (for example a gradient function of the scalar field)
+    vfield_args : tuple
+        Additional arguments to be passed to vfield
+    symmetry : None or list
+            Specify if the objective function contains symmetric variables.
+            The search space (and therefore performance) is decreased by up to
+            O(n!) times in the fully symmetric case.
+
+            E.g.  f(x) = (x_1 + x_2 + x_3) + (x_4)**2 + (x_5)**2 + (x_6)**2
+
+            In this equation x_2 and x_3 are symmetric to x_1, while x_5 and
+             x_6 are symmetric to x_4, this can be specified to the solver as:
+
+            symmetry = [0,  # Variable 1
+                        0,  # symmetric to variable 1
+                        0,  # symmetric to variable 1
+                        3,  # Variable 4
+                        3,  # symmetric to variable 4
+                        3,  # symmetric to variable 4
+                        ]
+
+    constraints : dict or sequence of dict, optional
+        Constraints definition.
+        Function(s) ``R**n`` in the form::
+
+            g(x) <= 0 applied as g : R^n -> R^m
+            h(x) == 0 applied as h : R^n -> R^p
+
+        Each constraint is defined in a dictionary with fields:
+
+            type : str
+                Constraint type: 'eq' for equality, 'ineq' for inequality.
+            fun : callable
+                The function defining the constraint.
+            jac : callable, optional
+                The Jacobian of `fun` (only for SLSQP).
+            args : sequence, optional
+                Extra arguments to be passed to the function and Jacobian.
+
+        Equality constraint means that the constraint function result is to
+        be zero whereas inequality means that it is to be
+        non-negative.constraints : dict or sequence of dict, optional
+        Constraints definition.
+        Function(s) ``R**n`` in the form::
+
+            g(x) <= 0 applied as g : R^n -> R^m
+            h(x) == 0 applied as h : R^n -> R^p
+
+        Each constraint is defined in a dictionary with fields:
+
+            type : str
+                Constraint type: 'eq' for equality, 'ineq' for inequality.
+            fun : callable
+                The function defining the constraint.
+            jac : callable, optional
+                The Jacobian of `fun` (unused).
+            args : sequence, optional
+                Extra arguments to be passed to the function and Jacobian.
+
+        Equality constraint means that the constraint function result is to
+        be zero whereas inequality means that it is to be non-negative.
+
+    workers : int  optional
+        Uses `multiprocessing.Pool <multiprocessing>`) to compute the field
+         functions in parallel.
+    """
+    def __init__(self, dim, domain=None, sfield=None, sfield_args=(),
+                 symmetry=None, constraints=None, workers=1):
+        self.dim = dim
+
+        # Domains
+        self.domain = domain
+        if domain is None:
+            self.bounds = [(0.0, 1.0), ] * dim
+        else:
+            self.bounds = domain
+        self.symmetry = symmetry
+        #      here in init to avoid if checks
+
+        # Field functions
+        self.sfield = sfield
+        self.sfield_args = sfield_args
+
+        # Process constraints
+        # Constraints
+        # Process constraint dict sequence:
+        if constraints is not None:
+            self.min_cons = constraints
+            self.g_cons = []
+            self.g_args = []
+            if not isinstance(constraints, (tuple, list)):
+                constraints = (constraints,)
+
+            for cons in constraints:
+                if cons['type'] in ('ineq'):
+                    self.g_cons.append(cons['fun'])
+                    try:
+                        self.g_args.append(cons['args'])
+                    except KeyError:
+                        self.g_args.append(())
+            self.g_cons = tuple(self.g_cons)
+            self.g_args = tuple(self.g_args)
+        else:
+            self.g_cons = None
+            self.g_args = None
+
+        # Homology properties
+        self.gen = 0
+        self.perm_cycle = 0
+
+        # Every cell is stored in a list of its generation,
+        # ex. the initial cell is stored in self.H[0]
+        # 1st get new cells are stored in self.H[1] etc.
+        # When a cell is sub-generated it is removed from this list
+
+        self.H = []  # Storage structure of vertex groups
+
+        # Cache of all vertices
+        if (sfield is not None) or (self.g_cons is not None):
+            # Initiate a vertex cache and an associated field cache, note that
+            # the field case is always initiated inside the vertex cache if an
+            # associated field scalar field is defined:
+            if sfield is not None:
+                self.V = VertexCacheField(field=sfield, field_args=sfield_args,
+                                          g_cons=self.g_cons,
+                                          g_cons_args=self.g_args,
+                                          workers=workers)
+            elif self.g_cons is not None:
+                self.V = VertexCacheField(field=sfield, field_args=sfield_args,
+                                          g_cons=self.g_cons,
+                                          g_cons_args=self.g_args,
+                                          workers=workers)
+        else:
+            self.V = VertexCacheIndex()
+
+        self.V_non_symm = []  # List of non-symmetric vertices
+
+    def __call__(self):
+        return self.H
+
+    # %% Triangulation methods
+    def cyclic_product(self, bounds, origin, supremum, centroid=True):
+        """Generate initial triangulation using cyclic product"""
+        # Define current hyperrectangle
+        vot = tuple(origin)
+        vut = tuple(supremum)  # Hyperrectangle supremum
+        self.V[vot]
+        vo = self.V[vot]
+        yield vo.x
+        self.V[vut].connect(self.V[vot])
+        yield vut
+        # Cyclic group approach with second x_l --- x_u operation.
+
+        # These containers store the "lower" and "upper" vertices
+        # corresponding to the origin or supremum of every C2 group.
+        # It has the structure of `dim` times embedded lists each containing
+        # these vertices as the entire complex grows. Bounds[0] has to be done
+        # outside the loops before we have symmetric containers.
+        # NOTE: This means that bounds[0][1] must always exist
+        C0x = [[self.V[vot]]]
+        a_vo = copy.copy(list(origin))
+        a_vo[0] = vut[0]  # Update aN Origin
+        a_vo = self.V[tuple(a_vo)]
+        # self.V[vot].connect(self.V[tuple(a_vo)])
+        self.V[vot].connect(a_vo)
+        yield a_vo.x
+        C1x = [[a_vo]]
+        # C1x = [[self.V[tuple(a_vo)]]]
+        ab_C = []  # Container for a + b operations
+
+        # Loop over remaining bounds
+        for i, x in enumerate(bounds[1:]):
+            # Update lower and upper containers
+            C0x.append([])
+            C1x.append([])
+            # try to access a second bound (if not, C1 is symmetric)
+            try:
+                # Early try so that we don't have to copy the cache before
+                # moving on to next C1/C2: Try to add the operation of a new
+                # C2 product by accessing the upper bound
+                x[1]
+                # Copy lists for iteration
+                cC0x = [x[:] for x in C0x[:i + 1]]
+                cC1x = [x[:] for x in C1x[:i + 1]]
+                for j, (VL, VU) in enumerate(zip(cC0x, cC1x)):
+                    for k, (vl, vu) in enumerate(zip(VL, VU)):
+                        # Build aN vertices for each lower-upper pair in N:
+                        a_vl = list(vl.x)
+                        a_vu = list(vu.x)
+                        a_vl[i + 1] = vut[i + 1]
+                        a_vu[i + 1] = vut[i + 1]
+                        a_vl = self.V[tuple(a_vl)]
+
+                        # Connect vertices in N to corresponding vertices
+                        # in aN:
+                        vl.connect(a_vl)
+
+                        yield a_vl.x
+
+                        a_vu = self.V[tuple(a_vu)]
+                        # Connect vertices in N to corresponding vertices
+                        # in aN:
+                        vu.connect(a_vu)
+
+                        # Connect new vertex pair in aN:
+                        a_vl.connect(a_vu)
+
+                        # Connect lower pair to upper (triangulation
+                        # operation of a + b (two arbitrary operations):
+                        vl.connect(a_vu)
+                        ab_C.append((vl, a_vu))
+
+                        # Update the containers
+                        C0x[i + 1].append(vl)
+                        C0x[i + 1].append(vu)
+                        C1x[i + 1].append(a_vl)
+                        C1x[i + 1].append(a_vu)
+
+                        # Update old containers
+                        C0x[j].append(a_vl)
+                        C1x[j].append(a_vu)
+
+                        # Yield new points
+                        yield a_vu.x
+
+                # Try to connect aN lower source of previous a + b
+                # operation with a aN vertex
+                ab_Cc = copy.copy(ab_C)
+
+                for vp in ab_Cc:
+                    b_v = list(vp[0].x)
+                    ab_v = list(vp[1].x)
+                    b_v[i + 1] = vut[i + 1]
+                    ab_v[i + 1] = vut[i + 1]
+                    b_v = self.V[tuple(b_v)]  # b + vl
+                    ab_v = self.V[tuple(ab_v)]  # b + a_vl
+                    # Note o---o is already connected
+                    vp[0].connect(ab_v)  # o-s
+                    b_v.connect(ab_v)  # s-s
+
+                    # Add new list of cross pairs
+                    ab_C.append((vp[0], ab_v))
+                    ab_C.append((b_v, ab_v))
+
+            except IndexError:
+                cC0x = C0x[i]
+                cC1x = C1x[i]
+                VL, VU = cC0x, cC1x
+                for k, (vl, vu) in enumerate(zip(VL, VU)):
+                    # Build aN vertices for each lower-upper pair in N:
+                    a_vu = list(vu.x)
+                    a_vu[i + 1] = vut[i + 1]
+                    # Connect vertices in N to corresponding vertices
+                    # in aN:
+                    a_vu = self.V[tuple(a_vu)]
+                    # Connect vertices in N to corresponding vertices
+                    # in aN:
+                    vu.connect(a_vu)
+                    # Connect new vertex pair in aN:
+                    # a_vl.connect(a_vu)
+                    # Connect lower pair to upper (triangulation
+                    # operation of a + b (two arbitrary operations):
+                    vl.connect(a_vu)
+                    ab_C.append((vl, a_vu))
+                    C0x[i + 1].append(vu)
+                    C1x[i + 1].append(a_vu)
+                    # Yield new points
+                    a_vu.connect(self.V[vut])
+                    yield a_vu.x
+                    ab_Cc = copy.copy(ab_C)
+                    for vp in ab_Cc:
+                        if vp[1].x[i] == vut[i]:
+                            ab_v = list(vp[1].x)
+                            ab_v[i + 1] = vut[i + 1]
+                            ab_v = self.V[tuple(ab_v)]  # b + a_vl
+                            # Note o---o is already connected
+                            vp[0].connect(ab_v)  # o-s
+
+                            # Add new list of cross pairs
+                            ab_C.append((vp[0], ab_v))
+
+        # Clean class trash
+        try:
+            del C0x
+            del cC0x
+            del C1x
+            del cC1x
+            del ab_C
+            del ab_Cc
+        except UnboundLocalError:
+            pass
+
+        # Extra yield to ensure that the triangulation is completed
+        if centroid:
+            vo = self.V[vot]
+            vs = self.V[vut]
+            # Disconnect the origin and supremum
+            vo.disconnect(vs)
+            # Build centroid
+            vc = self.split_edge(vot, vut)
+            for v in vo.nn:
+                v.connect(vc)
+            yield vc.x
+            return vc.x
+        else:
+            yield vut
+            return vut
+
+    def triangulate(self, n=None, symmetry=None, centroid=True,
+                    printout=False):
+        """
+        Triangulate the initial domain, if n is not None then a limited number
+        of points will be generated
+
+        Parameters
+        ----------
+        n : int, Number of points to be sampled.
+        symmetry :
+
+            Ex. Dictionary/hashtable
+            f(x) = (x_1 + x_2 + x_3) + (x_4)**2 + (x_5)**2 + (x_6)**2
+
+            symmetry = symmetry[0]: 0,  # Variable 1
+                       symmetry[1]: 0,  # symmetric to variable 1
+                       symmetry[2]: 0,  # symmetric to variable 1
+                       symmetry[3]: 3,  # Variable 4
+                       symmetry[4]: 3,  # symmetric to variable 4
+                       symmetry[5]: 3,  # symmetric to variable 4
+                        }
+        centroid : bool, if True add a central point to the hypercube
+        printout : bool, if True print out results
+
+        NOTES:
+        ------
+        Rather than using the combinatorial algorithm to connect vertices we
+        make the following observation:
+
+        The bound pairs are similar a C2 cyclic group and the structure is
+        formed using the cartesian product:
+
+        H = C2 x C2 x C2 ... x C2 (dim times)
+
+        So construct any normal subgroup N and consider H/N first, we connect
+        all vertices within N (ex. N is C2 (the first dimension), then we move
+        to a left coset aN (an operation moving around the defined H/N group by
+        for example moving from the lower bound in C2 (dimension 2) to the
+        higher bound in C2. During this operation connection all the vertices.
+        Now repeat the N connections. Note that these elements can be connected
+        in parallel.
+        """
+        # Inherit class arguments
+        if symmetry is None:
+            symmetry = self.symmetry
+        # Build origin and supremum vectors
+        origin = [i[0] for i in self.bounds]
+        self.origin = origin
+        supremum = [i[1] for i in self.bounds]
+
+        self.supremum = supremum
+
+        if symmetry is None:
+            cbounds = self.bounds
+        else:
+            cbounds = copy.copy(self.bounds)
+            for i, j in enumerate(symmetry):
+                if i is not j:
+                    # pop second entry on second symmetry vars
+                    cbounds[i] = [self.bounds[symmetry[i]][0]]
+                    # Sole (first) entry is the sup value and there is no
+                    # origin:
+                    cbounds[i] = [self.bounds[symmetry[i]][1]]
+                    if (self.bounds[symmetry[i]] is not
+                            self.bounds[symmetry[j]]):
+                        logging.warning(f"Variable {i} was specified as "
+                                        f"symmetetric to variable {j}, however"
+                                        f", the bounds {i} ="
+                                        f" {self.bounds[symmetry[i]]} and {j}"
+                                        f" ="
+                                        f" {self.bounds[symmetry[j]]} do not "
+                                        f"match, the mismatch was ignored in "
+                                        f"the initial triangulation.")
+                        cbounds[i] = self.bounds[symmetry[j]]
+
+        if n is None:
+            # Build generator
+            self.cp = self.cyclic_product(cbounds, origin, supremum, centroid)
+            for i in self.cp:
+                i
+
+            try:
+                self.triangulated_vectors.append((tuple(self.origin),
+                                                  tuple(self.supremum)))
+            except (AttributeError, KeyError):
+                self.triangulated_vectors = [(tuple(self.origin),
+                                              tuple(self.supremum))]
+
+        else:
+            # Check if generator already exists
+            try:
+                self.cp
+            except (AttributeError, KeyError):
+                self.cp = self.cyclic_product(cbounds, origin, supremum,
+                                              centroid)
+
+            try:
+                while len(self.V.cache) < n:
+                    next(self.cp)
+            except StopIteration:
+                try:
+                    self.triangulated_vectors.append((tuple(self.origin),
+                                                      tuple(self.supremum)))
+                except (AttributeError, KeyError):
+                    self.triangulated_vectors = [(tuple(self.origin),
+                                                  tuple(self.supremum))]
+
+        if printout:
+            # for v in self.C0():
+            #   v.print_out()
+            for v in self.V.cache:
+                self.V[v].print_out()
+
+        return
+
+    def refine(self, n=1):
+        if n is None:
+            try:
+                self.triangulated_vectors
+                self.refine_all()
+                return
+            except AttributeError as ae:
+                if str(ae) == "'Complex' object has no attribute " \
+                              "'triangulated_vectors'":
+                    self.triangulate(symmetry=self.symmetry)
+                    return
+                else:
+                    raise
+
+        nt = len(self.V.cache) + n  # Target number of total vertices
+        # In the outer while loop we iterate until we have added an extra `n`
+        # vertices to the complex:
+        while len(self.V.cache) < nt:  # while loop 1
+            try:  # try 1
+                # Try to access triangulated_vectors, this should only be
+                # defined if an initial triangulation has already been
+                # performed:
+                self.triangulated_vectors
+                # Try a usual iteration of the current generator, if it
+                # does not exist or is exhausted then produce a new generator
+                try:  # try 2
+                    next(self.rls)
+                except (AttributeError, StopIteration, KeyError):
+                    vp = self.triangulated_vectors[0]
+                    self.rls = self.refine_local_space(*vp, bounds=self.bounds)
+                    next(self.rls)
+
+            except (AttributeError, KeyError):
+                # If an initial triangulation has not been completed, then
+                # we start/continue the initial triangulation targeting `nt`
+                # vertices, if nt is greater than the initial number of
+                # vertices then the `refine` routine will move back to try 1.
+                self.triangulate(nt, self.symmetry)
+        return
+
+    def refine_all(self, centroids=True):
+        """Refine the entire domain of the current complex."""
+        try:
+            self.triangulated_vectors
+            tvs = copy.copy(self.triangulated_vectors)
+            for i, vp in enumerate(tvs):
+                self.rls = self.refine_local_space(*vp, bounds=self.bounds)
+                for i in self.rls:
+                    i
+        except AttributeError as ae:
+            if str(ae) == "'Complex' object has no attribute " \
+                          "'triangulated_vectors'":
+                self.triangulate(symmetry=self.symmetry, centroid=centroids)
+            else:
+                raise
+
+        # This adds a centroid to every new sub-domain generated and defined
+        # by self.triangulated_vectors, in addition the vertices ! to complete
+        # the triangulation
+        return
+
+    def refine_local_space(self, origin, supremum, bounds, centroid=1):
+        # Copy for later removal
+        origin_c = copy.copy(origin)
+        supremum_c = copy.copy(supremum)
+
+        # Initiate local variables redefined in later inner `for` loop:
+        vl, vu, a_vu = None, None, None
+
+        # Change the vector orientation so that it is only increasing
+        s_ov = list(origin)
+        s_origin = list(origin)
+        s_sv = list(supremum)
+        s_supremum = list(supremum)
+        for i, vi in enumerate(s_origin):
+            if s_ov[i] > s_sv[i]:
+                s_origin[i] = s_sv[i]
+                s_supremum[i] = s_ov[i]
+
+        vot = tuple(s_origin)
+        vut = tuple(s_supremum)  # Hyperrectangle supremum
+
+        vo = self.V[vot]  # initiate if doesn't exist yet
+        vs = self.V[vut]
+        # Start by finding the old centroid of the new space:
+        vco = self.split_edge(vo.x, vs.x)  # Split in case not centroid arg
+
+        # Find set of extreme vertices in current local space
+        sup_set = copy.copy(vco.nn)
+        # Cyclic group approach with second x_l --- x_u operation.
+
+        # These containers store the "lower" and "upper" vertices
+        # corresponding to the origin or supremum of every C2 group.
+        # It has the structure of `dim` times embedded lists each containing
+        # these vertices as the entire complex grows. Bounds[0] has to be done
+        # outside the loops before we have symmetric containers.
+        # NOTE: This means that bounds[0][1] must always exist
+
+        a_vl = copy.copy(list(vot))
+        a_vl[0] = vut[0]  # Update aN Origin
+        if tuple(a_vl) not in self.V.cache:
+            vo = self.V[vot]  # initiate if doesn't exist yet
+            vs = self.V[vut]
+            # Start by finding the old centroid of the new space:
+            vco = self.split_edge(vo.x, vs.x)  # Split in case not centroid arg
+
+            # Find set of extreme vertices in current local space
+            sup_set = copy.copy(vco.nn)
+            a_vl = copy.copy(list(vot))
+            a_vl[0] = vut[0]  # Update aN Origin
+            a_vl = self.V[tuple(a_vl)]
+        else:
+            a_vl = self.V[tuple(a_vl)]
+
+        c_v = self.split_edge(vo.x, a_vl.x)
+        c_v.connect(vco)
+        yield c_v.x
+        Cox = [[vo]]
+        Ccx = [[c_v]]
+        Cux = [[a_vl]]
+        ab_C = []  # Container for a + b operations
+        s_ab_C = []  # Container for symmetric a + b operations
+
+        # Loop over remaining bounds
+        for i, x in enumerate(bounds[1:]):
+            # Update lower and upper containers
+            Cox.append([])
+            Ccx.append([])
+            Cux.append([])
+            # try to access a second bound (if not, C1 is symmetric)
+            try:
+                t_a_vl = list(vot)
+                t_a_vl[i + 1] = vut[i + 1]
+
+                # New: lists are used anyway, so copy all
+                # %%
+                # Copy lists for iteration
+                cCox = [x[:] for x in Cox[:i + 1]]
+                cCcx = [x[:] for x in Ccx[:i + 1]]
+                cCux = [x[:] for x in Cux[:i + 1]]
+                # Try to connect aN lower source of previous a + b
+                # operation with a aN vertex
+                ab_Cc = copy.copy(ab_C)  # NOTE: We append ab_C in the
+                # (VL, VC, VU) for-loop, but we use the copy of the list in the
+                # ab_Cc for-loop.
+                s_ab_Cc = copy.copy(s_ab_C)
+
+                # Early try so that we don't have to copy the cache before
+                # moving on to next C1/C2: Try to add the operation of a new
+                # C2 product by accessing the upper bound
+                if tuple(t_a_vl) not in self.V.cache:
+                    # Raise error to continue symmetric refine
+                    raise IndexError
+                t_a_vu = list(vut)
+                t_a_vu[i + 1] = vut[i + 1]
+                if tuple(t_a_vu) not in self.V.cache:
+                    # Raise error to continue symmetric refine:
+                    raise IndexError
+
+                for vectors in s_ab_Cc:
+                    # s_ab_C.append([c_vc, vl, vu, a_vu])
+                    bc_vc = list(vectors[0].x)
+                    b_vl = list(vectors[1].x)
+                    b_vu = list(vectors[2].x)
+                    ba_vu = list(vectors[3].x)
+
+                    bc_vc[i + 1] = vut[i + 1]
+                    b_vl[i + 1] = vut[i + 1]
+                    b_vu[i + 1] = vut[i + 1]
+                    ba_vu[i + 1] = vut[i + 1]
+
+                    bc_vc = self.V[tuple(bc_vc)]
+                    bc_vc.connect(vco)  # NOTE: Unneeded?
+                    yield bc_vc
+
+                    # Split to centre, call this centre group "d = 0.5*a"
+                    d_bc_vc = self.split_edge(vectors[0].x, bc_vc.x)
+                    d_bc_vc.connect(bc_vc)
+                    d_bc_vc.connect(vectors[1])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[2])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[3])  # Connect all to centroid
+                    yield d_bc_vc.x
+                    b_vl = self.V[tuple(b_vl)]
+                    bc_vc.connect(b_vl)  # Connect aN cross pairs
+                    d_bc_vc.connect(b_vl)  # Connect all to centroid
+
+                    yield b_vl
+                    b_vu = self.V[tuple(b_vu)]
+                    bc_vc.connect(b_vu)  # Connect aN cross pairs
+                    d_bc_vc.connect(b_vu)  # Connect all to centroid
+
+                    b_vl_c = self.split_edge(b_vu.x, b_vl.x)
+                    bc_vc.connect(b_vl_c)
+
+                    yield b_vu
+                    ba_vu = self.V[tuple(ba_vu)]
+                    bc_vc.connect(ba_vu)  # Connect aN cross pairs
+                    d_bc_vc.connect(ba_vu)  # Connect all to centroid
+
+                    # Split the a + b edge of the initial triangulation:
+                    os_v = self.split_edge(vectors[1].x, ba_vu.x)  # o-s
+                    ss_v = self.split_edge(b_vl.x, ba_vu.x)  # s-s
+                    b_vu_c = self.split_edge(b_vu.x, ba_vu.x)
+                    bc_vc.connect(b_vu_c)
+                    yield os_v.x  # often equal to vco, but not always
+                    yield ss_v.x  # often equal to bc_vu, but not always
+                    yield ba_vu
+                    # Split remaining to centre, call this centre group
+                    # "d = 0.5*a"
+                    d_bc_vc = self.split_edge(vectors[0].x, bc_vc.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    yield d_bc_vc.x
+                    d_b_vl = self.split_edge(vectors[1].x, b_vl.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_b_vl)  # Connect dN cross pairs
+                    yield d_b_vl.x
+                    d_b_vu = self.split_edge(vectors[2].x, b_vu.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_b_vu)  # Connect dN cross pairs
+                    yield d_b_vu.x
+                    d_ba_vu = self.split_edge(vectors[3].x, ba_vu.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_ba_vu)  # Connect dN cross pairs
+                    yield d_ba_vu
+
+                    # comb = [c_vc, vl, vu, a_vl, a_vu,
+                    #       bc_vc, b_vl, b_vu, ba_vl, ba_vu]
+                    comb = [vl, vu, a_vu,
+                            b_vl, b_vu, ba_vu]
+                    comb_iter = itertools.combinations(comb, 2)
+                    for vecs in comb_iter:
+                        self.split_edge(vecs[0].x, vecs[1].x)
+                    # Add new list of cross pairs
+                    ab_C.append((d_bc_vc, vectors[1], b_vl, a_vu, ba_vu))
+                    ab_C.append((d_bc_vc, vl, b_vl, a_vu, ba_vu))  # = prev
+
+                for vectors in ab_Cc:
+                    bc_vc = list(vectors[0].x)
+                    b_vl = list(vectors[1].x)
+                    b_vu = list(vectors[2].x)
+                    ba_vl = list(vectors[3].x)
+                    ba_vu = list(vectors[4].x)
+                    bc_vc[i + 1] = vut[i + 1]
+                    b_vl[i + 1] = vut[i + 1]
+                    b_vu[i + 1] = vut[i + 1]
+                    ba_vl[i + 1] = vut[i + 1]
+                    ba_vu[i + 1] = vut[i + 1]
+                    bc_vc = self.V[tuple(bc_vc)]
+                    bc_vc.connect(vco)  # NOTE: Unneeded?
+                    yield bc_vc
+
+                    # Split to centre, call this centre group "d = 0.5*a"
+                    d_bc_vc = self.split_edge(vectors[0].x, bc_vc.x)
+                    d_bc_vc.connect(bc_vc)
+                    d_bc_vc.connect(vectors[1])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[2])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[3])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[4])  # Connect all to centroid
+                    yield d_bc_vc.x
+                    b_vl = self.V[tuple(b_vl)]
+                    bc_vc.connect(b_vl)  # Connect aN cross pairs
+                    d_bc_vc.connect(b_vl)  # Connect all to centroid
+                    yield b_vl
+                    b_vu = self.V[tuple(b_vu)]
+                    bc_vc.connect(b_vu)  # Connect aN cross pairs
+                    d_bc_vc.connect(b_vu)  # Connect all to centroid
+                    yield b_vu
+                    ba_vl = self.V[tuple(ba_vl)]
+                    bc_vc.connect(ba_vl)  # Connect aN cross pairs
+                    d_bc_vc.connect(ba_vl)  # Connect all to centroid
+                    self.split_edge(b_vu.x, ba_vl.x)
+                    yield ba_vl
+                    ba_vu = self.V[tuple(ba_vu)]
+                    bc_vc.connect(ba_vu)  # Connect aN cross pairs
+                    d_bc_vc.connect(ba_vu)  # Connect all to centroid
+                    # Split the a + b edge of the initial triangulation:
+                    os_v = self.split_edge(vectors[1].x, ba_vu.x)  # o-s
+                    ss_v = self.split_edge(b_vl.x, ba_vu.x)  # s-s
+                    yield os_v.x  # often equal to vco, but not always
+                    yield ss_v.x  # often equal to bc_vu, but not always
+                    yield ba_vu
+                    # Split remaining to centre, call this centre group
+                    # "d = 0.5*a"
+                    d_bc_vc = self.split_edge(vectors[0].x, bc_vc.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    yield d_bc_vc.x
+                    d_b_vl = self.split_edge(vectors[1].x, b_vl.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_b_vl)  # Connect dN cross pairs
+                    yield d_b_vl.x
+                    d_b_vu = self.split_edge(vectors[2].x, b_vu.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_b_vu)  # Connect dN cross pairs
+                    yield d_b_vu.x
+                    d_ba_vl = self.split_edge(vectors[3].x, ba_vl.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_ba_vl)  # Connect dN cross pairs
+                    yield d_ba_vl
+                    d_ba_vu = self.split_edge(vectors[4].x, ba_vu.x)
+                    d_bc_vc.connect(vco)  # NOTE: Unneeded?
+                    d_bc_vc.connect(d_ba_vu)  # Connect dN cross pairs
+                    yield d_ba_vu
+                    c_vc, vl, vu, a_vl, a_vu = vectors
+
+                    comb = [vl, vu, a_vl, a_vu,
+                            b_vl, b_vu, ba_vl, ba_vu]
+                    comb_iter = itertools.combinations(comb, 2)
+                    for vecs in comb_iter:
+                        self.split_edge(vecs[0].x, vecs[1].x)
+
+                    # Add new list of cross pairs
+                    ab_C.append((bc_vc, b_vl, b_vu, ba_vl, ba_vu))
+                    ab_C.append((d_bc_vc, d_b_vl, d_b_vu, d_ba_vl, d_ba_vu))
+                    ab_C.append((d_bc_vc, vectors[1], b_vl, a_vu, ba_vu))
+                    ab_C.append((d_bc_vc, vu, b_vu, a_vl, ba_vl))
+
+                for j, (VL, VC, VU) in enumerate(zip(cCox, cCcx, cCux)):
+                    for k, (vl, vc, vu) in enumerate(zip(VL, VC, VU)):
+                        # Build aN vertices for each lower-upper C3 group in N:
+                        a_vl = list(vl.x)
+                        a_vu = list(vu.x)
+                        a_vl[i + 1] = vut[i + 1]
+                        a_vu[i + 1] = vut[i + 1]
+                        a_vl = self.V[tuple(a_vl)]
+                        a_vu = self.V[tuple(a_vu)]
+                        # Note, build (a + vc) later for consistent yields
+                        # Split the a + b edge of the initial triangulation:
+                        c_vc = self.split_edge(vl.x, a_vu.x)
+                        self.split_edge(vl.x, vu.x)  # Equal to vc
+                        # Build cN vertices for each lower-upper C3 group in N:
+                        c_vc.connect(vco)
+                        c_vc.connect(vc)
+                        c_vc.connect(vl)  # Connect c + ac operations
+                        c_vc.connect(vu)  # Connect c + ac operations
+                        c_vc.connect(a_vl)  # Connect c + ac operations
+                        c_vc.connect(a_vu)  # Connect c + ac operations
+                        yield c_vc.x
+                        c_vl = self.split_edge(vl.x, a_vl.x)
+                        c_vl.connect(vco)
+                        c_vc.connect(c_vl)  # Connect cN group vertices
+                        yield c_vl.x
+                        # yield at end of loop:
+                        c_vu = self.split_edge(vu.x, a_vu.x)
+                        c_vu.connect(vco)
+                        # Connect remaining cN group vertices
+                        c_vc.connect(c_vu)  # Connect cN group vertices
+                        yield c_vu.x
+
+                        a_vc = self.split_edge(a_vl.x, a_vu.x)  # is (a + vc) ?
+                        a_vc.connect(vco)
+                        a_vc.connect(c_vc)
+
+                        # Storage for connecting c + ac operations:
+                        ab_C.append((c_vc, vl, vu, a_vl, a_vu))
+
+                        # Update the containers
+                        Cox[i + 1].append(vl)
+                        Cox[i + 1].append(vc)
+                        Cox[i + 1].append(vu)
+                        Ccx[i + 1].append(c_vl)
+                        Ccx[i + 1].append(c_vc)
+                        Ccx[i + 1].append(c_vu)
+                        Cux[i + 1].append(a_vl)
+                        Cux[i + 1].append(a_vc)
+                        Cux[i + 1].append(a_vu)
+
+                        # Update old containers
+                        Cox[j].append(c_vl)  # !
+                        Cox[j].append(a_vl)
+                        Ccx[j].append(c_vc)  # !
+                        Ccx[j].append(a_vc)  # !
+                        Cux[j].append(c_vu)  # !
+                        Cux[j].append(a_vu)
+
+                        # Yield new points
+                        yield a_vc.x
+
+            except IndexError:
+                for vectors in ab_Cc:
+                    ba_vl = list(vectors[3].x)
+                    ba_vu = list(vectors[4].x)
+                    ba_vl[i + 1] = vut[i + 1]
+                    ba_vu[i + 1] = vut[i + 1]
+                    ba_vu = self.V[tuple(ba_vu)]
+                    yield ba_vu
+                    d_bc_vc = self.split_edge(vectors[1].x, ba_vu.x)  # o-s
+                    yield ba_vu
+                    d_bc_vc.connect(vectors[1])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[2])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[3])  # Connect all to centroid
+                    d_bc_vc.connect(vectors[4])  # Connect all to centroid
+                    yield d_bc_vc.x
+                    ba_vl = self.V[tuple(ba_vl)]
+                    yield ba_vl
+                    d_ba_vl = self.split_edge(vectors[3].x, ba_vl.x)
+                    d_ba_vu = self.split_edge(vectors[4].x, ba_vu.x)
+                    d_ba_vc = self.split_edge(d_ba_vl.x, d_ba_vu.x)
+                    yield d_ba_vl
+                    yield d_ba_vu
+                    yield d_ba_vc
+                    c_vc, vl, vu, a_vl, a_vu = vectors
+                    comb = [vl, vu, a_vl, a_vu,
+                            ba_vl,
+                            ba_vu]
+                    comb_iter = itertools.combinations(comb, 2)
+                    for vecs in comb_iter:
+                        self.split_edge(vecs[0].x, vecs[1].x)
+
+                # Copy lists for iteration
+                cCox = Cox[i]
+                cCcx = Ccx[i]
+                cCux = Cux[i]
+                VL, VC, VU = cCox, cCcx, cCux
+                for k, (vl, vc, vu) in enumerate(zip(VL, VC, VU)):
+                    # Build aN vertices for each lower-upper pair in N:
+                    a_vu = list(vu.x)
+                    a_vu[i + 1] = vut[i + 1]
+
+                    # Connect vertices in N to corresponding vertices
+                    # in aN:
+                    a_vu = self.V[tuple(a_vu)]
+                    yield a_vl.x
+                    # Split the a + b edge of the initial triangulation:
+                    c_vc = self.split_edge(vl.x, a_vu.x)
+                    self.split_edge(vl.x, vu.x)  # Equal to vc
+                    c_vc.connect(vco)
+                    c_vc.connect(vc)
+                    c_vc.connect(vl)  # Connect c + ac operations
+                    c_vc.connect(vu)  # Connect c + ac operations
+                    c_vc.connect(a_vu)  # Connect c + ac operations
+                    yield (c_vc.x)
+                    c_vu = self.split_edge(vu.x,
+                                           a_vu.x)  # yield at end of loop
+                    c_vu.connect(vco)
+                    # Connect remaining cN group vertices
+                    c_vc.connect(c_vu)  # Connect cN group vertices
+                    yield (c_vu.x)
+
+                    # Update the containers
+                    Cox[i + 1].append(vu)
+                    Ccx[i + 1].append(c_vu)
+                    Cux[i + 1].append(a_vu)
+
+                    # Update old containers
+                    s_ab_C.append([c_vc, vl, vu, a_vu])
+
+                    yield a_vu.x
+
+        # Clean class trash
+        try:
+            del Cox
+            del Ccx
+            del Cux
+            del ab_C
+            del ab_Cc
+        except UnboundLocalError:
+            pass
+
+        try:
+            self.triangulated_vectors.remove((tuple(origin_c),
+                                              tuple(supremum_c)))
+        except ValueError:
+            # Turn this into a logging warning?
+            pass
+        # Add newly triangulated vectors:
+        for vs in sup_set:
+            self.triangulated_vectors.append((tuple(vco.x), tuple(vs.x)))
+
+        # Extra yield to ensure that the triangulation is completed
+        if centroid:
+            vcn_set = set()
+            c_nn_lists = []
+            for vs in sup_set:
+                # Build centroid
+                c_nn = self.vpool(vco.x, vs.x)
+                try:
+                    c_nn.remove(vcn_set)
+                except KeyError:
+                    pass
+                c_nn_lists.append(c_nn)
+
+            for c_nn in c_nn_lists:
+                try:
+                    c_nn.remove(vcn_set)
+                except KeyError:
+                    pass
+
+            for vs, c_nn in zip(sup_set, c_nn_lists):
+                # Build centroid
+                vcn = self.split_edge(vco.x, vs.x)
+                vcn_set.add(vcn)
+                try:  # Shouldn't be needed?
+                    c_nn.remove(vcn_set)
+                except KeyError:
+                    pass
+                for vnn in c_nn:
+                    vcn.connect(vnn)
+                yield vcn.x
+        else:
+            pass
+
+        yield vut
+        return
+
+    def refine_star(self, v):
+        """Refine the star domain of a vertex `v`."""
+        # Copy lists before iteration
+        vnn = copy.copy(v.nn)
+        v1nn = []
+        d_v0v1_set = set()
+        for v1 in vnn:
+            v1nn.append(copy.copy(v1.nn))
+
+        for v1, v1nn in zip(vnn, v1nn):
+            vnnu = v1nn.intersection(vnn)
+
+            d_v0v1 = self.split_edge(v.x, v1.x)
+            for o_d_v0v1 in d_v0v1_set:
+                d_v0v1.connect(o_d_v0v1)
+            d_v0v1_set.add(d_v0v1)
+            for v2 in vnnu:
+                d_v1v2 = self.split_edge(v1.x, v2.x)
+                d_v0v1.connect(d_v1v2)
+        return
+
+    @cache
+    def split_edge(self, v1, v2):
+        v1 = self.V[v1]
+        v2 = self.V[v2]
+        # Destroy original edge, if it exists:
+        v1.disconnect(v2)
+        # Compute vertex on centre of edge:
+        try:
+            vct = (v2.x_a - v1.x_a) / 2.0 + v1.x_a
+        except TypeError:  # Allow for decimal operations
+            vct = (v2.x_a - v1.x_a) / decimal.Decimal(2.0) + v1.x_a
+
+        vc = self.V[tuple(vct)]
+        # Connect to original 2 vertices to the new centre vertex
+        vc.connect(v1)
+        vc.connect(v2)
+        return vc
+
+    def vpool(self, origin, supremum):
+        vot = tuple(origin)
+        vst = tuple(supremum)
+        # Initiate vertices in case they don't exist
+        vo = self.V[vot]
+        vs = self.V[vst]
+
+        # Remove origin - supremum disconnect
+
+        # Find the lower/upper bounds of the refinement hyperrectangle
+        bl = list(vot)
+        bu = list(vst)
+        for i, (voi, vsi) in enumerate(zip(vot, vst)):
+            if bl[i] > vsi:
+                bl[i] = vsi
+            if bu[i] < voi:
+                bu[i] = voi
+
+        #      NOTE: This is mostly done with sets/lists because we aren't sure
+        #            how well the numpy arrays will scale to thousands of
+        #             dimensions.
+        vn_pool = set()
+        vn_pool.update(vo.nn)
+        vn_pool.update(vs.nn)
+        cvn_pool = copy.copy(vn_pool)
+        for vn in cvn_pool:
+            for i, xi in enumerate(vn.x):
+                if bl[i] <= xi <= bu[i]:
+                    pass
+                else:
+                    try:
+                        vn_pool.remove(vn)
+                    except KeyError:
+                        pass  # NOTE: Not all neigbouds are in initial pool
+        return vn_pool
+
+    def vf_to_vv(self, vertices, simplices):
+        """
+        Convert a vertex-face mesh to a vertex-vertex mesh used by this class
+
+        Parameters
+        ----------
+        vertices : list
+            Vertices
+        simplices : list
+            Simplices
+        """
+        if self.dim > 1:
+            for s in simplices:
+                edges = itertools.combinations(s, self.dim)
+                for e in edges:
+                    self.V[tuple(vertices[e[0]])].connect(
+                        self.V[tuple(vertices[e[1]])])
+        else:
+            for e in simplices:
+                self.V[tuple(vertices[e[0]])].connect(
+                    self.V[tuple(vertices[e[1]])])
+        return
+
+    def connect_vertex_non_symm(self, v_x, near=None):
+        """
+        Adds a vertex at coords v_x to the complex that is not symmetric to the
+        initial triangulation and sub-triangulation.
+
+        If near is specified (for example; a star domain or collections of
+        cells known to contain v) then only those simplices containd in near
+        will be searched, this greatly speeds up the process.
+
+        If near is not specified this method will search the entire simplicial
+        complex structure.
+
+        Parameters
+        ----------
+        v_x : tuple
+            Coordinates of non-symmetric vertex
+        near : set or list
+            List of vertices, these are points near v to check for
+        """
+        if near is None:
+            star = self.V
+        else:
+            star = near
+        # Create the vertex origin
+        if tuple(v_x) in self.V.cache:
+            if self.V[v_x] in self.V_non_symm:
+                pass
+            else:
+                return
+
+        self.V[v_x]
+        found_nn = False
+        S_rows = []
+        for v in star:
+            S_rows.append(v.x)
+
+        S_rows = np.array(S_rows)
+        A = np.array(S_rows) - np.array(v_x)
+        # Iterate through all the possible simplices of S_rows
+        for s_i in itertools.combinations(range(S_rows.shape[0]),
+                                          r=self.dim + 1):
+            # Check if connected, else s_i is not a simplex
+            valid_simplex = True
+            for i in itertools.combinations(s_i, r=2):
+                # Every combination of vertices must be connected, we check of
+                # the current iteration of all combinations of s_i are
+                # connected we break the loop if it is not.
+                if ((self.V[tuple(S_rows[i[1]])] not in
+                        self.V[tuple(S_rows[i[0]])].nn)
+                    and (self.V[tuple(S_rows[i[0]])] not in
+                         self.V[tuple(S_rows[i[1]])].nn)):
+                    valid_simplex = False
+                    break
+
+            S = S_rows[tuple([s_i])]
+            if valid_simplex:
+                if self.deg_simplex(S, proj=None):
+                    valid_simplex = False
+
+            # If s_i is a valid simplex we can test if v_x is inside si
+            if valid_simplex:
+                # Find the A_j0 value from the precalculated values
+                A_j0 = A[tuple([s_i])]
+                if self.in_simplex(S, v_x, A_j0):
+                    found_nn = True
+                    # breaks the main for loop, s_i is the target simplex:
+                    break
+
+        # Connect the simplex to point
+        if found_nn:
+            for i in s_i:
+                self.V[v_x].connect(self.V[tuple(S_rows[i])])
+        # Attached the simplex to storage for all non-symmetric vertices
+        self.V_non_symm.append(self.V[v_x])
+        # this bool value indicates a successful connection if True:
+        return found_nn
+
+    def in_simplex(self, S, v_x, A_j0=None):
+        """Check if a vector v_x is in simplex `S`.
+
+        Parameters
+        ----------
+        S : array_like
+            Array containing simplex entries of vertices as rows
+        v_x :
+            A candidate vertex
+        A_j0 : array, optional,
+            Allows for A_j0 to be pre-calculated
+
+        Returns
+        -------
+        res : boolean
+            True if `v_x` is in `S`
+        """
+        A_11 = np.delete(S, 0, 0) - S[0]
+
+        sign_det_A_11 = np.sign(np.linalg.det(A_11))
+        if sign_det_A_11 == 0:
+            # NOTE: We keep the variable A_11, but we loop through A_jj
+            # ind=
+            # while sign_det_A_11 == 0:
+            #    A_11 = np.delete(S, ind, 0) - S[ind]
+            #    sign_det_A_11 = np.sign(np.linalg.det(A_11))
+
+            sign_det_A_11 = -1  # TODO: Choose another det of j instead?
+            # TODO: Unlikely to work in many cases
+
+        if A_j0 is None:
+            A_j0 = S - v_x
+
+        for d in range(self.dim + 1):
+            det_A_jj = (-1)**d * sign_det_A_11
+            # TODO: Note that scipy might be faster to add as an optional
+            #       dependency
+            sign_det_A_j0 = np.sign(np.linalg.det(np.delete(A_j0, d,
+                                                                     0)))
+            # TODO: Note if sign_det_A_j0 == then the point is coplanar to the
+            #       current simplex facet, so perhaps return True and attach?
+            if det_A_jj == sign_det_A_j0:
+                continue
+            else:
+                return False
+
+        return True
+
+    def deg_simplex(self, S, proj=None):
+        """Test a simplex S for degeneracy (linear dependence in R^dim).
+
+        Parameters
+        ----------
+        S : np.array
+            Simplex with rows as vertex vectors
+        proj : array, optional,
+            If the projection S[1:] - S[0] is already
+            computed it can be added as an optional argument.
+        """
+        # Strategy: we test all combination of faces, if any of the
+        # determinants are zero then the vectors lie on the same face and is
+        # therefore linearly dependent in the space of R^dim
+        if proj is None:
+            proj = S[1:] - S[0]
+
+        # TODO: Is checking the projection of one vertex against faces of other
+        #       vertices sufficient? Or do we need to check more vertices in
+        #       dimensions higher than 2?
+        # TODO: Literature seems to suggest using proj.T, but why is this
+        #       needed?
+        if np.linalg.det(proj) == 0.0:  # TODO: Repalace with tolerance?
+            return True  # Simplex is degenerate
+        else:
+            return False  # Simplex is not degenerate
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_vertex.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_vertex.py
new file mode 100644
index 0000000000000000000000000000000000000000..e47558ee7b9a181638841c34bb63603b5d37e221
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_shgo_lib/_vertex.py
@@ -0,0 +1,460 @@
+import collections
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from scipy._lib._util import MapWrapper
+
+
+class VertexBase(ABC):
+    """
+    Base class for a vertex.
+    """
+    def __init__(self, x, nn=None, index=None):
+        """
+        Initiation of a vertex object.
+
+        Parameters
+        ----------
+        x : tuple or vector
+            The geometric location (domain).
+        nn : list, optional
+            Nearest neighbour list.
+        index : int, optional
+            Index of vertex.
+        """
+        self.x = x
+        self.hash = hash(self.x)  # Save precomputed hash
+
+        if nn is not None:
+            self.nn = set(nn)  # can use .indexupdate to add a new list
+        else:
+            self.nn = set()
+
+        self.index = index
+
+    def __hash__(self):
+        return self.hash
+
+    def __getattr__(self, item):
+        if item not in ['x_a']:
+            raise AttributeError(f"{type(self)} object has no attribute "
+                                 f"'{item}'")
+        if item == 'x_a':
+            self.x_a = np.array(self.x)
+            return self.x_a
+
+    @abstractmethod
+    def connect(self, v):
+        raise NotImplementedError("This method is only implemented with an "
+                                  "associated child of the base class.")
+
+    @abstractmethod
+    def disconnect(self, v):
+        raise NotImplementedError("This method is only implemented with an "
+                                  "associated child of the base class.")
+
+    def star(self):
+        """Returns the star domain ``st(v)`` of the vertex.
+
+        Parameters
+        ----------
+        v :
+            The vertex ``v`` in ``st(v)``
+
+        Returns
+        -------
+        st : set
+            A set containing all the vertices in ``st(v)``
+        """
+        self.st = self.nn
+        self.st.add(self)
+        return self.st
+
+
+class VertexScalarField(VertexBase):
+    """
+    Add homology properties of a scalar field f: R^n --> R associated with
+    the geometry built from the VertexBase class
+    """
+
+    def __init__(self, x, field=None, nn=None, index=None, field_args=(),
+                 g_cons=None, g_cons_args=()):
+        """
+        Parameters
+        ----------
+        x : tuple,
+            vector of vertex coordinates
+        field : callable, optional
+            a scalar field f: R^n --> R associated with the geometry
+        nn : list, optional
+            list of nearest neighbours
+        index : int, optional
+            index of the vertex
+        field_args : tuple, optional
+            additional arguments to be passed to field
+        g_cons : callable, optional
+            constraints on the vertex
+        g_cons_args : tuple, optional
+            additional arguments to be passed to g_cons
+
+        """
+        super().__init__(x, nn=nn, index=index)
+
+        # Note Vertex is only initiated once for all x so only
+        # evaluated once
+        # self.feasible = None
+
+        # self.f is externally defined by the cache to allow parallel
+        # processing
+        # None type that will break arithmetic operations unless defined
+        # self.f = None
+
+        self.check_min = True
+        self.check_max = True
+
+    def connect(self, v):
+        """Connects self to another vertex object v.
+
+        Parameters
+        ----------
+        v : VertexBase or VertexScalarField object
+        """
+        if v is not self and v not in self.nn:
+            self.nn.add(v)
+            v.nn.add(self)
+
+            # Flags for checking homology properties:
+            self.check_min = True
+            self.check_max = True
+            v.check_min = True
+            v.check_max = True
+
+    def disconnect(self, v):
+        if v in self.nn:
+            self.nn.remove(v)
+            v.nn.remove(self)
+
+            # Flags for checking homology properties:
+            self.check_min = True
+            self.check_max = True
+            v.check_min = True
+            v.check_max = True
+
+    def minimiser(self):
+        """Check whether this vertex is strictly less than all its
+           neighbours"""
+        if self.check_min:
+            self._min = all(self.f < v.f for v in self.nn)
+            self.check_min = False
+
+        return self._min
+
+    def maximiser(self):
+        """
+        Check whether this vertex is strictly greater than all its
+        neighbours.
+        """
+        if self.check_max:
+            self._max = all(self.f > v.f for v in self.nn)
+            self.check_max = False
+
+        return self._max
+
+
+class VertexVectorField(VertexBase):
+    """
+    Add homology properties of a scalar field f: R^n --> R^m associated with
+    the geometry built from the VertexBase class.
+    """
+
+    def __init__(self, x, sfield=None, vfield=None, field_args=(),
+                 vfield_args=(), g_cons=None,
+                 g_cons_args=(), nn=None, index=None):
+        super().__init__(x, nn=nn, index=index)
+
+        raise NotImplementedError("This class is still a work in progress")
+
+
+class VertexCacheBase:
+    """Base class for a vertex cache for a simplicial complex."""
+    def __init__(self):
+
+        self.cache = collections.OrderedDict()
+        self.nfev = 0  # Feasible points
+        self.index = -1
+
+    def __iter__(self):
+        for v in self.cache:
+            yield self.cache[v]
+        return
+
+    def size(self):
+        """Returns the size of the vertex cache."""
+        return self.index + 1
+
+    def print_out(self):
+        headlen = len(f"Vertex cache of size: {len(self.cache)}:")
+        print('=' * headlen)
+        print(f"Vertex cache of size: {len(self.cache)}:")
+        print('=' * headlen)
+        for v in self.cache:
+            self.cache[v].print_out()
+
+
+class VertexCube(VertexBase):
+    """Vertex class to be used for a pure simplicial complex with no associated
+    differential geometry (single level domain that exists in R^n)"""
+    def __init__(self, x, nn=None, index=None):
+        super().__init__(x, nn=nn, index=index)
+
+    def connect(self, v):
+        if v is not self and v not in self.nn:
+            self.nn.add(v)
+            v.nn.add(self)
+
+    def disconnect(self, v):
+        if v in self.nn:
+            self.nn.remove(v)
+            v.nn.remove(self)
+
+
+class VertexCacheIndex(VertexCacheBase):
+    def __init__(self):
+        """
+        Class for a vertex cache for a simplicial complex without an associated
+        field. Useful only for building and visualising a domain complex.
+
+        Parameters
+        ----------
+        """
+        super().__init__()
+        self.Vertex = VertexCube
+
+    def __getitem__(self, x, nn=None):
+        try:
+            return self.cache[x]
+        except KeyError:
+            self.index += 1
+            xval = self.Vertex(x, index=self.index)
+            # logging.info("New generated vertex at x = {}".format(x))
+            # NOTE: Surprisingly high performance increase if logging
+            # is commented out
+            self.cache[x] = xval
+            return self.cache[x]
+
+
+class VertexCacheField(VertexCacheBase):
+    def __init__(self, field=None, field_args=(), g_cons=None, g_cons_args=(),
+                 workers=1):
+        """
+        Class for a vertex cache for a simplicial complex with an associated
+        field.
+
+        Parameters
+        ----------
+        field : callable
+            Scalar or vector field callable.
+        field_args : tuple, optional
+            Any additional fixed parameters needed to completely specify the
+            field function
+        g_cons : dict or sequence of dict, optional
+            Constraints definition.
+            Function(s) ``R**n`` in the form::
+        g_cons_args : tuple, optional
+            Any additional fixed parameters needed to completely specify the
+            constraint functions
+        workers : int  optional
+            Uses `multiprocessing.Pool <multiprocessing>`) to compute the field
+             functions in parallel.
+
+        """
+        super().__init__()
+        self.index = -1
+        self.Vertex = VertexScalarField
+        self.field = field
+        self.field_args = field_args
+        self.wfield = FieldWrapper(field, field_args)  # if workers is not 1
+
+        self.g_cons = g_cons
+        self.g_cons_args = g_cons_args
+        self.wgcons = ConstraintWrapper(g_cons, g_cons_args)
+        self.gpool = set()  # A set of tuples to process for feasibility
+
+        # Field processing objects
+        self.fpool = set()  # A set of tuples to process for scalar function
+        self.sfc_lock = False  # True if self.fpool is non-Empty
+
+        self.workers = workers
+        self._mapwrapper = MapWrapper(workers)
+
+        if workers == 1:
+            self.process_gpool = self.proc_gpool
+            if g_cons is None:
+                self.process_fpool = self.proc_fpool_nog
+            else:
+                self.process_fpool = self.proc_fpool_g
+        else:
+            self.process_gpool = self.pproc_gpool
+            if g_cons is None:
+                self.process_fpool = self.pproc_fpool_nog
+            else:
+                self.process_fpool = self.pproc_fpool_g
+
+    def __getitem__(self, x, nn=None):
+        try:
+            return self.cache[x]
+        except KeyError:
+            self.index += 1
+            xval = self.Vertex(x, field=self.field, nn=nn, index=self.index,
+                               field_args=self.field_args,
+                               g_cons=self.g_cons,
+                               g_cons_args=self.g_cons_args)
+
+            self.cache[x] = xval  # Define in cache
+            self.gpool.add(xval)  # Add to pool for processing feasibility
+            self.fpool.add(xval)  # Add to pool for processing field values
+            return self.cache[x]
+
+    def __getstate__(self):
+        self_dict = self.__dict__.copy()
+        del self_dict['pool']
+        return self_dict
+
+    def process_pools(self):
+        if self.g_cons is not None:
+            self.process_gpool()
+        self.process_fpool()
+        self.proc_minimisers()
+
+    def feasibility_check(self, v):
+        v.feasible = True
+        for g, args in zip(self.g_cons, self.g_cons_args):
+            # constraint may return more than 1 value.
+            if np.any(g(v.x_a, *args) < 0.0):
+                v.f = np.inf
+                v.feasible = False
+                break
+
+    def compute_sfield(self, v):
+        """Compute the scalar field values of a vertex object `v`.
+
+        Parameters
+        ----------
+        v : VertexBase or VertexScalarField object
+        """
+        try:
+            v.f = self.field(v.x_a, *self.field_args)
+            self.nfev += 1
+        except AttributeError:
+            v.f = np.inf
+            # logging.warning(f"Field function not found at x = {self.x_a}")
+        if np.isnan(v.f):
+            v.f = np.inf
+
+    def proc_gpool(self):
+        """Process all constraints."""
+        if self.g_cons is not None:
+            for v in self.gpool:
+                self.feasibility_check(v)
+        # Clean the pool
+        self.gpool = set()
+
+    def pproc_gpool(self):
+        """Process all constraints in parallel."""
+        gpool_l = []
+        for v in self.gpool:
+            gpool_l.append(v.x_a)
+
+        G = self._mapwrapper(self.wgcons.gcons, gpool_l)
+        for v, g in zip(self.gpool, G):
+            v.feasible = g  # set vertex object attribute v.feasible = g (bool)
+
+    def proc_fpool_g(self):
+        """Process all field functions with constraints supplied."""
+        for v in self.fpool:
+            if v.feasible:
+                self.compute_sfield(v)
+        # Clean the pool
+        self.fpool = set()
+
+    def proc_fpool_nog(self):
+        """Process all field functions with no constraints supplied."""
+        for v in self.fpool:
+            self.compute_sfield(v)
+        # Clean the pool
+        self.fpool = set()
+
+    def pproc_fpool_g(self):
+        """
+        Process all field functions with constraints supplied in parallel.
+        """
+        self.wfield.func
+        fpool_l = []
+        for v in self.fpool:
+            if v.feasible:
+                fpool_l.append(v.x_a)
+            else:
+                v.f = np.inf
+        F = self._mapwrapper(self.wfield.func, fpool_l)
+        for va, f in zip(fpool_l, F):
+            vt = tuple(va)
+            self[vt].f = f  # set vertex object attribute v.f = f
+            self.nfev += 1
+        # Clean the pool
+        self.fpool = set()
+
+    def pproc_fpool_nog(self):
+        """
+        Process all field functions with no constraints supplied in parallel.
+        """
+        self.wfield.func
+        fpool_l = []
+        for v in self.fpool:
+            fpool_l.append(v.x_a)
+        F = self._mapwrapper(self.wfield.func, fpool_l)
+        for va, f in zip(fpool_l, F):
+            vt = tuple(va)
+            self[vt].f = f  # set vertex object attribute v.f = f
+            self.nfev += 1
+        # Clean the pool
+        self.fpool = set()
+
+    def proc_minimisers(self):
+        """Check for minimisers."""
+        for v in self:
+            v.minimiser()
+            v.maximiser()
+
+
+class ConstraintWrapper:
+    """Object to wrap constraints to pass to `multiprocessing.Pool`."""
+    def __init__(self, g_cons, g_cons_args):
+        self.g_cons = g_cons
+        self.g_cons_args = g_cons_args
+
+    def gcons(self, v_x_a):
+        vfeasible = True
+        for g, args in zip(self.g_cons, self.g_cons_args):
+            # constraint may return more than 1 value.
+            if np.any(g(v_x_a, *args) < 0.0):
+                vfeasible = False
+                break
+        return vfeasible
+
+
+class FieldWrapper:
+    """Object to wrap field to pass to `multiprocessing.Pool`."""
+    def __init__(self, field, field_args):
+        self.field = field
+        self.field_args = field_args
+
+    def func(self, v_x_a):
+        try:
+            v_f = self.field(v_x_a, *self.field_args)
+        except Exception:
+            v_f = np.inf
+        if np.isnan(v_f):
+            v_f = np.inf
+
+        return v_f
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..549cfb9760dda474cb858b7b36d236af48111067
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__init__.py
@@ -0,0 +1,6 @@
+"""This module contains the equality constrained SQP solver."""
+
+
+from .minimize_trustregion_constr import _minimize_trustregion_constr
+
+__all__ = ['_minimize_trustregion_constr']
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64912268855dcd699e82c848c028ea953b6db3de
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/canonical_constraint.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/canonical_constraint.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dcce9fb6d07e10a67a54a9a0ead228da6c0ec631
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/canonical_constraint.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/equality_constrained_sqp.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/equality_constrained_sqp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87c7b27facdd4cfe50a42b76283a95e5b7054823
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/equality_constrained_sqp.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/minimize_trustregion_constr.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/minimize_trustregion_constr.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30d0fc2615e9856bd99745c2a52bd09ed57e843f
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/minimize_trustregion_constr.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/projections.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/projections.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b813ad3e9a6f7c3950b4b018855db13d034af0c3
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/projections.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/qp_subproblem.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/qp_subproblem.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf809220861d98cc42e0978b03044aa740fa5acd
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/qp_subproblem.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/report.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/report.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd13a59db07f5991ddae7ff78daeca9e9bed6f64
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/report.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/tr_interior_point.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/tr_interior_point.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..124ab835e9f2338b1fda49326d52ce0807687963
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/__pycache__/tr_interior_point.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/canonical_constraint.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/canonical_constraint.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1ad583bb8eee524d35c2e5bb16934f78629cd69
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/canonical_constraint.py
@@ -0,0 +1,390 @@
+import numpy as np
+import scipy.sparse as sps
+
+
+class CanonicalConstraint:
+    """Canonical constraint to use with trust-constr algorithm.
+
+    It represents the set of constraints of the form::
+
+        f_eq(x) = 0
+        f_ineq(x) <= 0
+
+    where ``f_eq`` and ``f_ineq`` are evaluated by a single function, see
+    below.
+
+    The class is supposed to be instantiated by factory methods, which
+    should prepare the parameters listed below.
+
+    Parameters
+    ----------
+    n_eq, n_ineq : int
+        Number of equality and inequality constraints respectively.
+    fun : callable
+        Function defining the constraints. The signature is
+        ``fun(x) -> c_eq, c_ineq``, where ``c_eq`` is ndarray with `n_eq`
+        components and ``c_ineq`` is ndarray with `n_ineq` components.
+    jac : callable
+        Function to evaluate the Jacobian of the constraint. The signature
+        is ``jac(x) -> J_eq, J_ineq``, where ``J_eq`` and ``J_ineq`` are
+        either ndarray of csr_matrix of shapes (n_eq, n) and (n_ineq, n),
+        respectively.
+    hess : callable
+        Function to evaluate the Hessian of the constraints multiplied
+        by Lagrange multipliers, that is
+        ``dot(f_eq, v_eq) + dot(f_ineq, v_ineq)``. The signature is
+        ``hess(x, v_eq, v_ineq) -> H``, where ``H`` has an implied
+        shape (n, n) and provide a matrix-vector product operation
+        ``H.dot(p)``.
+    keep_feasible : ndarray, shape (n_ineq,)
+        Mask indicating which inequality constraints should be kept feasible.
+    """
+    def __init__(self, n_eq, n_ineq, fun, jac, hess, keep_feasible):
+        self.n_eq = n_eq
+        self.n_ineq = n_ineq
+        self.fun = fun
+        self.jac = jac
+        self.hess = hess
+        self.keep_feasible = keep_feasible
+
+    @classmethod
+    def from_PreparedConstraint(cls, constraint):
+        """Create an instance from `PreparedConstrained` object."""
+        lb, ub = constraint.bounds
+        cfun = constraint.fun
+        keep_feasible = constraint.keep_feasible
+
+        if np.all(lb == -np.inf) and np.all(ub == np.inf):
+            return cls.empty(cfun.n)
+
+        if np.all(lb == -np.inf) and np.all(ub == np.inf):
+            return cls.empty(cfun.n)
+        elif np.all(lb == ub):
+            return cls._equal_to_canonical(cfun, lb)
+        elif np.all(lb == -np.inf):
+            return cls._less_to_canonical(cfun, ub, keep_feasible)
+        elif np.all(ub == np.inf):
+            return cls._greater_to_canonical(cfun, lb, keep_feasible)
+        else:
+            return cls._interval_to_canonical(cfun, lb, ub, keep_feasible)
+
+    @classmethod
+    def empty(cls, n):
+        """Create an "empty" instance.
+
+        This "empty" instance is required to allow working with unconstrained
+        problems as if they have some constraints.
+        """
+        empty_fun = np.empty(0)
+        empty_jac = np.empty((0, n))
+        empty_hess = sps.csr_matrix((n, n))
+
+        def fun(x):
+            return empty_fun, empty_fun
+
+        def jac(x):
+            return empty_jac, empty_jac
+
+        def hess(x, v_eq, v_ineq):
+            return empty_hess
+
+        return cls(0, 0, fun, jac, hess, np.empty(0, dtype=np.bool_))
+
+    @classmethod
+    def concatenate(cls, canonical_constraints, sparse_jacobian):
+        """Concatenate multiple `CanonicalConstraint` into one.
+
+        `sparse_jacobian` (bool) determines the Jacobian format of the
+        concatenated constraint. Note that items in `canonical_constraints`
+        must have their Jacobians in the same format.
+        """
+        def fun(x):
+            if canonical_constraints:
+                eq_all, ineq_all = zip(
+                        *[c.fun(x) for c in canonical_constraints])
+            else:
+                eq_all, ineq_all = [], []
+
+            return np.hstack(eq_all), np.hstack(ineq_all)
+
+        if sparse_jacobian:
+            vstack = sps.vstack
+        else:
+            vstack = np.vstack
+
+        def jac(x):
+            if canonical_constraints:
+                eq_all, ineq_all = zip(
+                        *[c.jac(x) for c in canonical_constraints])
+            else:
+                eq_all, ineq_all = [], []
+
+            return vstack(eq_all), vstack(ineq_all)
+
+        def hess(x, v_eq, v_ineq):
+            hess_all = []
+            index_eq = 0
+            index_ineq = 0
+            for c in canonical_constraints:
+                vc_eq = v_eq[index_eq:index_eq + c.n_eq]
+                vc_ineq = v_ineq[index_ineq:index_ineq + c.n_ineq]
+                hess_all.append(c.hess(x, vc_eq, vc_ineq))
+                index_eq += c.n_eq
+                index_ineq += c.n_ineq
+
+            def matvec(p):
+                result = np.zeros_like(p)
+                for h in hess_all:
+                    result += h.dot(p)
+                return result
+
+            n = x.shape[0]
+            return sps.linalg.LinearOperator((n, n), matvec, dtype=float)
+
+        n_eq = sum(c.n_eq for c in canonical_constraints)
+        n_ineq = sum(c.n_ineq for c in canonical_constraints)
+        keep_feasible = np.hstack([c.keep_feasible for c in
+                                   canonical_constraints])
+
+        return cls(n_eq, n_ineq, fun, jac, hess, keep_feasible)
+
+    @classmethod
+    def _equal_to_canonical(cls, cfun, value):
+        empty_fun = np.empty(0)
+        n = cfun.n
+
+        n_eq = value.shape[0]
+        n_ineq = 0
+        keep_feasible = np.empty(0, dtype=bool)
+
+        if cfun.sparse_jacobian:
+            empty_jac = sps.csr_matrix((0, n))
+        else:
+            empty_jac = np.empty((0, n))
+
+        def fun(x):
+            return cfun.fun(x) - value, empty_fun
+
+        def jac(x):
+            return cfun.jac(x), empty_jac
+
+        def hess(x, v_eq, v_ineq):
+            return cfun.hess(x, v_eq)
+
+        empty_fun = np.empty(0)
+        n = cfun.n
+        if cfun.sparse_jacobian:
+            empty_jac = sps.csr_matrix((0, n))
+        else:
+            empty_jac = np.empty((0, n))
+
+        return cls(n_eq, n_ineq, fun, jac, hess, keep_feasible)
+
+    @classmethod
+    def _less_to_canonical(cls, cfun, ub, keep_feasible):
+        empty_fun = np.empty(0)
+        n = cfun.n
+        if cfun.sparse_jacobian:
+            empty_jac = sps.csr_matrix((0, n))
+        else:
+            empty_jac = np.empty((0, n))
+
+        finite_ub = ub < np.inf
+        n_eq = 0
+        n_ineq = np.sum(finite_ub)
+
+        if np.all(finite_ub):
+            def fun(x):
+                return empty_fun, cfun.fun(x) - ub
+
+            def jac(x):
+                return empty_jac, cfun.jac(x)
+
+            def hess(x, v_eq, v_ineq):
+                return cfun.hess(x, v_ineq)
+        else:
+            finite_ub = np.nonzero(finite_ub)[0]
+            keep_feasible = keep_feasible[finite_ub]
+            ub = ub[finite_ub]
+
+            def fun(x):
+                return empty_fun, cfun.fun(x)[finite_ub] - ub
+
+            def jac(x):
+                return empty_jac, cfun.jac(x)[finite_ub]
+
+            def hess(x, v_eq, v_ineq):
+                v = np.zeros(cfun.m)
+                v[finite_ub] = v_ineq
+                return cfun.hess(x, v)
+
+        return cls(n_eq, n_ineq, fun, jac, hess, keep_feasible)
+
+    @classmethod
+    def _greater_to_canonical(cls, cfun, lb, keep_feasible):
+        empty_fun = np.empty(0)
+        n = cfun.n
+        if cfun.sparse_jacobian:
+            empty_jac = sps.csr_matrix((0, n))
+        else:
+            empty_jac = np.empty((0, n))
+
+        finite_lb = lb > -np.inf
+        n_eq = 0
+        n_ineq = np.sum(finite_lb)
+
+        if np.all(finite_lb):
+            def fun(x):
+                return empty_fun, lb - cfun.fun(x)
+
+            def jac(x):
+                return empty_jac, -cfun.jac(x)
+
+            def hess(x, v_eq, v_ineq):
+                return cfun.hess(x, -v_ineq)
+        else:
+            finite_lb = np.nonzero(finite_lb)[0]
+            keep_feasible = keep_feasible[finite_lb]
+            lb = lb[finite_lb]
+
+            def fun(x):
+                return empty_fun, lb - cfun.fun(x)[finite_lb]
+
+            def jac(x):
+                return empty_jac, -cfun.jac(x)[finite_lb]
+
+            def hess(x, v_eq, v_ineq):
+                v = np.zeros(cfun.m)
+                v[finite_lb] = -v_ineq
+                return cfun.hess(x, v)
+
+        return cls(n_eq, n_ineq, fun, jac, hess, keep_feasible)
+
+    @classmethod
+    def _interval_to_canonical(cls, cfun, lb, ub, keep_feasible):
+        lb_inf = lb == -np.inf
+        ub_inf = ub == np.inf
+        equal = lb == ub
+        less = lb_inf & ~ub_inf
+        greater = ub_inf & ~lb_inf
+        interval = ~equal & ~lb_inf & ~ub_inf
+
+        equal = np.nonzero(equal)[0]
+        less = np.nonzero(less)[0]
+        greater = np.nonzero(greater)[0]
+        interval = np.nonzero(interval)[0]
+        n_less = less.shape[0]
+        n_greater = greater.shape[0]
+        n_interval = interval.shape[0]
+        n_ineq = n_less + n_greater + 2 * n_interval
+        n_eq = equal.shape[0]
+
+        keep_feasible = np.hstack((keep_feasible[less],
+                                   keep_feasible[greater],
+                                   keep_feasible[interval],
+                                   keep_feasible[interval]))
+
+        def fun(x):
+            f = cfun.fun(x)
+            eq = f[equal] - lb[equal]
+            le = f[less] - ub[less]
+            ge = lb[greater] - f[greater]
+            il = f[interval] - ub[interval]
+            ig = lb[interval] - f[interval]
+            return eq, np.hstack((le, ge, il, ig))
+
+        def jac(x):
+            J = cfun.jac(x)
+            eq = J[equal]
+            le = J[less]
+            ge = -J[greater]
+            il = J[interval]
+            ig = -il
+            if sps.issparse(J):
+                ineq = sps.vstack((le, ge, il, ig))
+            else:
+                ineq = np.vstack((le, ge, il, ig))
+            return eq, ineq
+
+        def hess(x, v_eq, v_ineq):
+            n_start = 0
+            v_l = v_ineq[n_start:n_start + n_less]
+            n_start += n_less
+            v_g = v_ineq[n_start:n_start + n_greater]
+            n_start += n_greater
+            v_il = v_ineq[n_start:n_start + n_interval]
+            n_start += n_interval
+            v_ig = v_ineq[n_start:n_start + n_interval]
+
+            v = np.zeros_like(lb)
+            v[equal] = v_eq
+            v[less] = v_l
+            v[greater] = -v_g
+            v[interval] = v_il - v_ig
+
+            return cfun.hess(x, v)
+
+        return cls(n_eq, n_ineq, fun, jac, hess, keep_feasible)
+
+
+def initial_constraints_as_canonical(n, prepared_constraints, sparse_jacobian):
+    """Convert initial values of the constraints to the canonical format.
+
+    The purpose to avoid one additional call to the constraints at the initial
+    point. It takes saved values in `PreparedConstraint`, modififies and
+    concatenates them to the canonical constraint format.
+    """
+    c_eq = []
+    c_ineq = []
+    J_eq = []
+    J_ineq = []
+
+    for c in prepared_constraints:
+        f = c.fun.f
+        J = c.fun.J
+        lb, ub = c.bounds
+        if np.all(lb == ub):
+            c_eq.append(f - lb)
+            J_eq.append(J)
+        elif np.all(lb == -np.inf):
+            finite_ub = ub < np.inf
+            c_ineq.append(f[finite_ub] - ub[finite_ub])
+            J_ineq.append(J[finite_ub])
+        elif np.all(ub == np.inf):
+            finite_lb = lb > -np.inf
+            c_ineq.append(lb[finite_lb] - f[finite_lb])
+            J_ineq.append(-J[finite_lb])
+        else:
+            lb_inf = lb == -np.inf
+            ub_inf = ub == np.inf
+            equal = lb == ub
+            less = lb_inf & ~ub_inf
+            greater = ub_inf & ~lb_inf
+            interval = ~equal & ~lb_inf & ~ub_inf
+
+            c_eq.append(f[equal] - lb[equal])
+            c_ineq.append(f[less] - ub[less])
+            c_ineq.append(lb[greater] - f[greater])
+            c_ineq.append(f[interval] - ub[interval])
+            c_ineq.append(lb[interval] - f[interval])
+
+            J_eq.append(J[equal])
+            J_ineq.append(J[less])
+            J_ineq.append(-J[greater])
+            J_ineq.append(J[interval])
+            J_ineq.append(-J[interval])
+
+    c_eq = np.hstack(c_eq) if c_eq else np.empty(0)
+    c_ineq = np.hstack(c_ineq) if c_ineq else np.empty(0)
+
+    if sparse_jacobian:
+        vstack = sps.vstack
+        empty = sps.csr_matrix((0, n))
+    else:
+        vstack = np.vstack
+        empty = np.empty((0, n))
+
+    J_eq = vstack(J_eq) if J_eq else empty
+    J_ineq = vstack(J_ineq) if J_ineq else empty
+
+    return c_eq, c_ineq, J_eq, J_ineq
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/equality_constrained_sqp.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/equality_constrained_sqp.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb4c05dcdd03fb990d3418220a398e249ef581ee
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/equality_constrained_sqp.py
@@ -0,0 +1,231 @@
+"""Byrd-Omojokun Trust-Region SQP method."""
+
+from scipy.sparse import eye as speye
+from .projections import projections
+from .qp_subproblem import modified_dogleg, projected_cg, box_intersections
+import numpy as np
+from numpy.linalg import norm
+
+__all__ = ['equality_constrained_sqp']
+
+
+def default_scaling(x):
+    n, = np.shape(x)
+    return speye(n)
+
+
+def equality_constrained_sqp(fun_and_constr, grad_and_jac, lagr_hess,
+                             x0, fun0, grad0, constr0,
+                             jac0, stop_criteria,
+                             state,
+                             initial_penalty,
+                             initial_trust_radius,
+                             factorization_method,
+                             trust_lb=None,
+                             trust_ub=None,
+                             scaling=default_scaling):
+    """Solve nonlinear equality-constrained problem using trust-region SQP.
+
+    Solve optimization problem:
+
+        minimize fun(x)
+        subject to: constr(x) = 0
+
+    using Byrd-Omojokun Trust-Region SQP method described in [1]_. Several
+    implementation details are based on [2]_ and [3]_, p. 549.
+
+    References
+    ----------
+    .. [1] Lalee, Marucha, Jorge Nocedal, and Todd Plantenga. "On the
+           implementation of an algorithm for large-scale equality
+           constrained optimization." SIAM Journal on
+           Optimization 8.3 (1998): 682-706.
+    .. [2] Byrd, Richard H., Mary E. Hribar, and Jorge Nocedal.
+           "An interior point algorithm for large-scale nonlinear
+           programming." SIAM Journal on Optimization 9.4 (1999): 877-900.
+    .. [3] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+           Second Edition (2006).
+    """
+    PENALTY_FACTOR = 0.3  # Rho from formula (3.51), reference [2]_, p.891.
+    LARGE_REDUCTION_RATIO = 0.9
+    INTERMEDIARY_REDUCTION_RATIO = 0.3
+    SUFFICIENT_REDUCTION_RATIO = 1e-8  # Eta from reference [2]_, p.892.
+    TRUST_ENLARGEMENT_FACTOR_L = 7.0
+    TRUST_ENLARGEMENT_FACTOR_S = 2.0
+    MAX_TRUST_REDUCTION = 0.5
+    MIN_TRUST_REDUCTION = 0.1
+    SOC_THRESHOLD = 0.1
+    TR_FACTOR = 0.8  # Zeta from formula (3.21), reference [2]_, p.885.
+    BOX_FACTOR = 0.5
+
+    n, = np.shape(x0)  # Number of parameters
+
+    # Set default lower and upper bounds.
+    if trust_lb is None:
+        trust_lb = np.full(n, -np.inf)
+    if trust_ub is None:
+        trust_ub = np.full(n, np.inf)
+
+    # Initial values
+    x = np.copy(x0)
+    trust_radius = initial_trust_radius
+    penalty = initial_penalty
+    # Compute Values
+    f = fun0
+    c = grad0
+    b = constr0
+    A = jac0
+    S = scaling(x)
+    # Get projections
+    try:
+        Z, LS, Y = projections(A, factorization_method)
+    except ValueError as e:
+        if str(e) == "expected square matrix":
+            # can be the case if there are more equality
+            # constraints than independent variables
+            raise ValueError(
+                "The 'expected square matrix' error can occur if there are"
+                " more equality constraints than independent variables."
+                " Consider how your constraints are set up, or use"
+                " factorization_method='SVDFactorization'."
+            ) from e
+        else:
+            raise e
+
+    # Compute least-square lagrange multipliers
+    v = -LS.dot(c)
+    # Compute Hessian
+    H = lagr_hess(x, v)
+
+    # Update state parameters
+    optimality = norm(c + A.T.dot(v), np.inf)
+    constr_violation = norm(b, np.inf) if len(b) > 0 else 0
+    cg_info = {'niter': 0, 'stop_cond': 0,
+               'hits_boundary': False}
+
+    last_iteration_failed = False
+    while not stop_criteria(state, x, last_iteration_failed,
+                            optimality, constr_violation,
+                            trust_radius, penalty, cg_info):
+        # Normal Step - `dn`
+        # minimize 1/2*||A dn + b||^2
+        # subject to:
+        # ||dn|| <= TR_FACTOR * trust_radius
+        # BOX_FACTOR * lb <= dn <= BOX_FACTOR * ub.
+        dn = modified_dogleg(A, Y, b,
+                             TR_FACTOR*trust_radius,
+                             BOX_FACTOR*trust_lb,
+                             BOX_FACTOR*trust_ub)
+
+        # Tangential Step - `dt`
+        # Solve the QP problem:
+        # minimize 1/2 dt.T H dt + dt.T (H dn + c)
+        # subject to:
+        # A dt = 0
+        # ||dt|| <= sqrt(trust_radius**2 - ||dn||**2)
+        # lb - dn <= dt <= ub - dn
+        c_t = H.dot(dn) + c
+        b_t = np.zeros_like(b)
+        trust_radius_t = np.sqrt(trust_radius**2 - np.linalg.norm(dn)**2)
+        lb_t = trust_lb - dn
+        ub_t = trust_ub - dn
+        dt, cg_info = projected_cg(H, c_t, Z, Y, b_t,
+                                   trust_radius_t,
+                                   lb_t, ub_t)
+
+        # Compute update (normal + tangential steps).
+        d = dn + dt
+
+        # Compute second order model: 1/2 d H d + c.T d + f.
+        quadratic_model = 1/2*(H.dot(d)).dot(d) + c.T.dot(d)
+        # Compute linearized constraint: l = A d + b.
+        linearized_constr = A.dot(d)+b
+        # Compute new penalty parameter according to formula (3.52),
+        # reference [2]_, p.891.
+        vpred = norm(b) - norm(linearized_constr)
+        # Guarantee `vpred` always positive,
+        # regardless of roundoff errors.
+        vpred = max(1e-16, vpred)
+        previous_penalty = penalty
+        if quadratic_model > 0:
+            new_penalty = quadratic_model / ((1-PENALTY_FACTOR)*vpred)
+            penalty = max(penalty, new_penalty)
+        # Compute predicted reduction according to formula (3.52),
+        # reference [2]_, p.891.
+        predicted_reduction = -quadratic_model + penalty*vpred
+
+        # Compute merit function at current point
+        merit_function = f + penalty*norm(b)
+        # Evaluate function and constraints at trial point
+        x_next = x + S.dot(d)
+        f_next, b_next = fun_and_constr(x_next)
+        # Compute merit function at trial point
+        merit_function_next = f_next + penalty*norm(b_next)
+        # Compute actual reduction according to formula (3.54),
+        # reference [2]_, p.892.
+        actual_reduction = merit_function - merit_function_next
+        # Compute reduction ratio
+        reduction_ratio = actual_reduction / predicted_reduction
+
+        # Second order correction (SOC), reference [2]_, p.892.
+        if reduction_ratio < SUFFICIENT_REDUCTION_RATIO and \
+           norm(dn) <= SOC_THRESHOLD * norm(dt):
+            # Compute second order correction
+            y = -Y.dot(b_next)
+            # Make sure increment is inside box constraints
+            _, t, intersect = box_intersections(d, y, trust_lb, trust_ub)
+            # Compute tentative point
+            x_soc = x + S.dot(d + t*y)
+            f_soc, b_soc = fun_and_constr(x_soc)
+            # Recompute actual reduction
+            merit_function_soc = f_soc + penalty*norm(b_soc)
+            actual_reduction_soc = merit_function - merit_function_soc
+            # Recompute reduction ratio
+            reduction_ratio_soc = actual_reduction_soc / predicted_reduction
+            if intersect and reduction_ratio_soc >= SUFFICIENT_REDUCTION_RATIO:
+                x_next = x_soc
+                f_next = f_soc
+                b_next = b_soc
+                reduction_ratio = reduction_ratio_soc
+
+        # Readjust trust region step, formula (3.55), reference [2]_, p.892.
+        if reduction_ratio >= LARGE_REDUCTION_RATIO:
+            trust_radius = max(TRUST_ENLARGEMENT_FACTOR_L * norm(d),
+                               trust_radius)
+        elif reduction_ratio >= INTERMEDIARY_REDUCTION_RATIO:
+            trust_radius = max(TRUST_ENLARGEMENT_FACTOR_S * norm(d),
+                               trust_radius)
+        # Reduce trust region step, according to reference [3]_, p.696.
+        elif reduction_ratio < SUFFICIENT_REDUCTION_RATIO:
+            trust_reduction = ((1-SUFFICIENT_REDUCTION_RATIO) /
+                               (1-reduction_ratio))
+            new_trust_radius = trust_reduction * norm(d)
+            if new_trust_radius >= MAX_TRUST_REDUCTION * trust_radius:
+                trust_radius *= MAX_TRUST_REDUCTION
+            elif new_trust_radius >= MIN_TRUST_REDUCTION * trust_radius:
+                trust_radius = new_trust_radius
+            else:
+                trust_radius *= MIN_TRUST_REDUCTION
+
+        # Update iteration
+        if reduction_ratio >= SUFFICIENT_REDUCTION_RATIO:
+            x = x_next
+            f, b = f_next, b_next
+            c, A = grad_and_jac(x)
+            S = scaling(x)
+            # Get projections
+            Z, LS, Y = projections(A, factorization_method)
+            # Compute least-square lagrange multipliers
+            v = -LS.dot(c)
+            # Compute Hessian
+            H = lagr_hess(x, v)
+            # Set Flag
+            last_iteration_failed = False
+            # Otimality values
+            optimality = norm(c + A.T.dot(v), np.inf)
+            constr_violation = norm(b, np.inf) if len(b) > 0 else 0
+        else:
+            penalty = previous_penalty
+            last_iteration_failed = True
+
+    return x, state
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py
new file mode 100644
index 0000000000000000000000000000000000000000..2835ea5445c0eafc303f0cb1ab8543f48b7e3bb9
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py
@@ -0,0 +1,564 @@
+import time
+import numpy as np
+from scipy.sparse.linalg import LinearOperator
+from .._differentiable_functions import VectorFunction
+from .._constraints import (
+    NonlinearConstraint, LinearConstraint, PreparedConstraint, Bounds, strict_bounds)
+from .._hessian_update_strategy import BFGS
+from .._optimize import OptimizeResult
+from .._differentiable_functions import ScalarFunction
+from .equality_constrained_sqp import equality_constrained_sqp
+from .canonical_constraint import (CanonicalConstraint,
+                                   initial_constraints_as_canonical)
+from .tr_interior_point import tr_interior_point
+from .report import BasicReport, SQPReport, IPReport
+
+
+TERMINATION_MESSAGES = {
+    0: "The maximum number of function evaluations is exceeded.",
+    1: "`gtol` termination condition is satisfied.",
+    2: "`xtol` termination condition is satisfied.",
+    3: "`callback` function requested termination."
+}
+
+
+class HessianLinearOperator:
+    """Build LinearOperator from hessp"""
+    def __init__(self, hessp, n):
+        self.hessp = hessp
+        self.n = n
+
+    def __call__(self, x, *args):
+        def matvec(p):
+            return self.hessp(x, p, *args)
+
+        return LinearOperator((self.n, self.n), matvec=matvec)
+
+
+class LagrangianHessian:
+    """The Hessian of the Lagrangian as LinearOperator.
+
+    The Lagrangian is computed as the objective function plus all the
+    constraints multiplied with some numbers (Lagrange multipliers).
+    """
+    def __init__(self, n, objective_hess, constraints_hess):
+        self.n = n
+        self.objective_hess = objective_hess
+        self.constraints_hess = constraints_hess
+
+    def __call__(self, x, v_eq=np.empty(0), v_ineq=np.empty(0)):
+        H_objective = self.objective_hess(x)
+        H_constraints = self.constraints_hess(x, v_eq, v_ineq)
+
+        def matvec(p):
+            return H_objective.dot(p) + H_constraints.dot(p)
+
+        return LinearOperator((self.n, self.n), matvec)
+
+
+def update_state_sqp(state, x, last_iteration_failed, objective, prepared_constraints,
+                     start_time, tr_radius, constr_penalty, cg_info):
+    state.nit += 1
+    state.nfev = objective.nfev
+    state.njev = objective.ngev
+    state.nhev = objective.nhev
+    state.constr_nfev = [c.fun.nfev if isinstance(c.fun, VectorFunction) else 0
+                         for c in prepared_constraints]
+    state.constr_njev = [c.fun.njev if isinstance(c.fun, VectorFunction) else 0
+                         for c in prepared_constraints]
+    state.constr_nhev = [c.fun.nhev if isinstance(c.fun, VectorFunction) else 0
+                         for c in prepared_constraints]
+
+    if not last_iteration_failed:
+        state.x = x
+        state.fun = objective.f
+        state.grad = objective.g
+        state.v = [c.fun.v for c in prepared_constraints]
+        state.constr = [c.fun.f for c in prepared_constraints]
+        state.jac = [c.fun.J for c in prepared_constraints]
+        # Compute Lagrangian Gradient
+        state.lagrangian_grad = np.copy(state.grad)
+        for c in prepared_constraints:
+            state.lagrangian_grad += c.fun.J.T.dot(c.fun.v)
+        state.optimality = np.linalg.norm(state.lagrangian_grad, np.inf)
+        # Compute maximum constraint violation
+        state.constr_violation = 0
+        for i in range(len(prepared_constraints)):
+            lb, ub = prepared_constraints[i].bounds
+            c = state.constr[i]
+            state.constr_violation = np.max([state.constr_violation,
+                                             np.max(lb - c),
+                                             np.max(c - ub)])
+
+    state.execution_time = time.time() - start_time
+    state.tr_radius = tr_radius
+    state.constr_penalty = constr_penalty
+    state.cg_niter += cg_info["niter"]
+    state.cg_stop_cond = cg_info["stop_cond"]
+
+    return state
+
+
+def update_state_ip(state, x, last_iteration_failed, objective,
+                    prepared_constraints, start_time,
+                    tr_radius, constr_penalty, cg_info,
+                    barrier_parameter, barrier_tolerance):
+    state = update_state_sqp(state, x, last_iteration_failed, objective,
+                             prepared_constraints, start_time, tr_radius,
+                             constr_penalty, cg_info)
+    state.barrier_parameter = barrier_parameter
+    state.barrier_tolerance = barrier_tolerance
+    return state
+
+
+def _minimize_trustregion_constr(fun, x0, args, grad,
+                                 hess, hessp, bounds, constraints,
+                                 xtol=1e-8, gtol=1e-8,
+                                 barrier_tol=1e-8,
+                                 sparse_jacobian=None,
+                                 callback=None, maxiter=1000,
+                                 verbose=0, finite_diff_rel_step=None,
+                                 initial_constr_penalty=1.0, initial_tr_radius=1.0,
+                                 initial_barrier_parameter=0.1,
+                                 initial_barrier_tolerance=0.1,
+                                 factorization_method=None,
+                                 disp=False):
+    """Minimize a scalar function subject to constraints.
+
+    Parameters
+    ----------
+    gtol : float, optional
+        Tolerance for termination by the norm of the Lagrangian gradient.
+        The algorithm will terminate when both the infinity norm (i.e., max
+        abs value) of the Lagrangian gradient and the constraint violation
+        are smaller than ``gtol``. Default is 1e-8.
+    xtol : float, optional
+        Tolerance for termination by the change of the independent variable.
+        The algorithm will terminate when ``tr_radius < xtol``, where
+        ``tr_radius`` is the radius of the trust region used in the algorithm.
+        Default is 1e-8.
+    barrier_tol : float, optional
+        Threshold on the barrier parameter for the algorithm termination.
+        When inequality constraints are present, the algorithm will terminate
+        only when the barrier parameter is less than `barrier_tol`.
+        Default is 1e-8.
+    sparse_jacobian : {bool, None}, optional
+        Determines how to represent Jacobians of the constraints. If bool,
+        then Jacobians of all the constraints will be converted to the
+        corresponding format. If None (default), then Jacobians won't be
+        converted, but the algorithm can proceed only if they all have the
+        same format.
+    initial_tr_radius: float, optional
+        Initial trust radius. The trust radius gives the maximum distance
+        between solution points in consecutive iterations. It reflects the
+        trust the algorithm puts in the local approximation of the optimization
+        problem. For an accurate local approximation the trust-region should be
+        large and for an  approximation valid only close to the current point it
+        should be a small one. The trust radius is automatically updated throughout
+        the optimization process, with ``initial_tr_radius`` being its initial value.
+        Default is 1 (recommended in [1]_, p. 19).
+    initial_constr_penalty : float, optional
+        Initial constraints penalty parameter. The penalty parameter is used for
+        balancing the requirements of decreasing the objective function
+        and satisfying the constraints. It is used for defining the merit function:
+        ``merit_function(x) = fun(x) + constr_penalty * constr_norm_l2(x)``,
+        where ``constr_norm_l2(x)`` is the l2 norm of a vector containing all
+        the constraints. The merit function is used for accepting or rejecting
+        trial points and ``constr_penalty`` weights the two conflicting goals
+        of reducing objective function and constraints. The penalty is automatically
+        updated throughout the optimization  process, with
+        ``initial_constr_penalty`` being its  initial value. Default is 1
+        (recommended in [1]_, p 19).
+    initial_barrier_parameter, initial_barrier_tolerance: float, optional
+        Initial barrier parameter and initial tolerance for the barrier subproblem.
+        Both are used only when inequality constraints are present. For dealing with
+        optimization problems ``min_x f(x)`` subject to inequality constraints
+        ``c(x) <= 0`` the algorithm introduces slack variables, solving the problem
+        ``min_(x,s) f(x) + barrier_parameter*sum(ln(s))`` subject to the equality
+        constraints  ``c(x) + s = 0`` instead of the original problem. This subproblem
+        is solved for decreasing values of ``barrier_parameter`` and with decreasing
+        tolerances for the termination, starting with ``initial_barrier_parameter``
+        for the barrier parameter and ``initial_barrier_tolerance`` for the
+        barrier tolerance. Default is 0.1 for both values (recommended in [1]_ p. 19).
+        Also note that ``barrier_parameter`` and ``barrier_tolerance`` are updated
+        with the same prefactor.
+    factorization_method : string or None, optional
+        Method to factorize the Jacobian of the constraints. Use None (default)
+        for the auto selection or one of:
+
+            - 'NormalEquation' (requires scikit-sparse)
+            - 'AugmentedSystem'
+            - 'QRFactorization'
+            - 'SVDFactorization'
+
+        The methods 'NormalEquation' and 'AugmentedSystem' can be used only
+        with sparse constraints. The projections required by the algorithm
+        will be computed using, respectively, the normal equation  and the
+        augmented system approaches explained in [1]_. 'NormalEquation'
+        computes the Cholesky factorization of ``A A.T`` and 'AugmentedSystem'
+        performs the LU factorization of an augmented system. They usually
+        provide similar results. 'AugmentedSystem' is used by default for
+        sparse matrices.
+
+        The methods 'QRFactorization' and 'SVDFactorization' can be used
+        only with dense constraints. They compute the required projections
+        using, respectively, QR and SVD factorizations. The 'SVDFactorization'
+        method can cope with Jacobian matrices with deficient row rank and will
+        be used whenever other factorization methods fail (which may imply the
+        conversion of sparse matrices to a dense format when required).
+        By default, 'QRFactorization' is used for dense matrices.
+    finite_diff_rel_step : None or array_like, optional
+        Relative step size for the finite difference approximation.
+    maxiter : int, optional
+        Maximum number of algorithm iterations. Default is 1000.
+    verbose : {0, 1, 2}, optional
+        Level of algorithm's verbosity:
+
+            * 0 (default) : work silently.
+            * 1 : display a termination report.
+            * 2 : display progress during iterations.
+            * 3 : display progress during iterations (more complete report).
+
+    disp : bool, optional
+        If True (default), then `verbose` will be set to 1 if it was 0.
+
+    Returns
+    -------
+    `OptimizeResult` with the fields documented below. Note the following:
+
+        1. All values corresponding to the constraints are ordered as they
+           were passed to the solver. And values corresponding to `bounds`
+           constraints are put *after* other constraints.
+        2. All numbers of function, Jacobian or Hessian evaluations correspond
+           to numbers of actual Python function calls. It means, for example,
+           that if a Jacobian is estimated by finite differences, then the
+           number of Jacobian evaluations will be zero and the number of
+           function evaluations will be incremented by all calls during the
+           finite difference estimation.
+
+    x : ndarray, shape (n,)
+        Solution found.
+    optimality : float
+        Infinity norm of the Lagrangian gradient at the solution.
+    constr_violation : float
+        Maximum constraint violation at the solution.
+    fun : float
+        Objective function at the solution.
+    grad : ndarray, shape (n,)
+        Gradient of the objective function at the solution.
+    lagrangian_grad : ndarray, shape (n,)
+        Gradient of the Lagrangian function at the solution.
+    nit : int
+        Total number of iterations.
+    nfev : integer
+        Number of the objective function evaluations.
+    njev : integer
+        Number of the objective function gradient evaluations.
+    nhev : integer
+        Number of the objective function Hessian evaluations.
+    cg_niter : int
+        Total number of the conjugate gradient method iterations.
+    method : {'equality_constrained_sqp', 'tr_interior_point'}
+        Optimization method used.
+    constr : list of ndarray
+        List of constraint values at the solution.
+    jac : list of {ndarray, sparse matrix}
+        List of the Jacobian matrices of the constraints at the solution.
+    v : list of ndarray
+        List of the Lagrange multipliers for the constraints at the solution.
+        For an inequality constraint a positive multiplier means that the upper
+        bound is active, a negative multiplier means that the lower bound is
+        active and if a multiplier is zero it means the constraint is not
+        active.
+    constr_nfev : list of int
+        Number of constraint evaluations for each of the constraints.
+    constr_njev : list of int
+        Number of Jacobian matrix evaluations for each of the constraints.
+    constr_nhev : list of int
+        Number of Hessian evaluations for each of the constraints.
+    tr_radius : float
+        Radius of the trust region at the last iteration.
+    constr_penalty : float
+        Penalty parameter at the last iteration, see `initial_constr_penalty`.
+    barrier_tolerance : float
+        Tolerance for the barrier subproblem at the last iteration.
+        Only for problems with inequality constraints.
+    barrier_parameter : float
+        Barrier parameter at the last iteration. Only for problems
+        with inequality constraints.
+    execution_time : float
+        Total execution time.
+    message : str
+        Termination message.
+    status : {0, 1, 2, 3}
+        Termination status:
+
+            * 0 : The maximum number of function evaluations is exceeded.
+            * 1 : `gtol` termination condition is satisfied.
+            * 2 : `xtol` termination condition is satisfied.
+            * 3 : `callback` function requested termination.
+
+    cg_stop_cond : int
+        Reason for CG subproblem termination at the last iteration:
+
+            * 0 : CG subproblem not evaluated.
+            * 1 : Iteration limit was reached.
+            * 2 : Reached the trust-region boundary.
+            * 3 : Negative curvature detected.
+            * 4 : Tolerance was satisfied.
+
+    References
+    ----------
+    .. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
+           Trust region methods. 2000. Siam. pp. 19.
+    """
+    x0 = np.atleast_1d(x0).astype(float)
+    n_vars = np.size(x0)
+    if hess is None:
+        if callable(hessp):
+            hess = HessianLinearOperator(hessp, n_vars)
+        else:
+            hess = BFGS()
+    if disp and verbose == 0:
+        verbose = 1
+
+    if bounds is not None:
+        modified_lb = np.nextafter(bounds.lb, -np.inf, where=bounds.lb > -np.inf)
+        modified_ub = np.nextafter(bounds.ub, np.inf, where=bounds.ub < np.inf)
+        modified_lb = np.where(np.isfinite(bounds.lb), modified_lb, bounds.lb)
+        modified_ub = np.where(np.isfinite(bounds.ub), modified_ub, bounds.ub)
+        bounds = Bounds(modified_lb, modified_ub, keep_feasible=bounds.keep_feasible)
+        finite_diff_bounds = strict_bounds(bounds.lb, bounds.ub,
+                                           bounds.keep_feasible, n_vars)
+    else:
+        finite_diff_bounds = (-np.inf, np.inf)
+
+    # Define Objective Function
+    objective = ScalarFunction(fun, x0, args, grad, hess,
+                               finite_diff_rel_step, finite_diff_bounds)
+
+    # Put constraints in list format when needed.
+    if isinstance(constraints, (NonlinearConstraint, LinearConstraint)):
+        constraints = [constraints]
+
+    # Prepare constraints.
+    prepared_constraints = [
+        PreparedConstraint(c, x0, sparse_jacobian, finite_diff_bounds)
+        for c in constraints]
+
+    # Check that all constraints are either sparse or dense.
+    n_sparse = sum(c.fun.sparse_jacobian for c in prepared_constraints)
+    if 0 < n_sparse < len(prepared_constraints):
+        raise ValueError("All constraints must have the same kind of the "
+                         "Jacobian --- either all sparse or all dense. "
+                         "You can set the sparsity globally by setting "
+                         "`sparse_jacobian` to either True of False.")
+    if prepared_constraints:
+        sparse_jacobian = n_sparse > 0
+
+    if bounds is not None:
+        if sparse_jacobian is None:
+            sparse_jacobian = True
+        prepared_constraints.append(PreparedConstraint(bounds, x0,
+                                                       sparse_jacobian))
+
+    # Concatenate initial constraints to the canonical form.
+    c_eq0, c_ineq0, J_eq0, J_ineq0 = initial_constraints_as_canonical(
+        n_vars, prepared_constraints, sparse_jacobian)
+
+    # Prepare all canonical constraints and concatenate it into one.
+    canonical_all = [CanonicalConstraint.from_PreparedConstraint(c)
+                     for c in prepared_constraints]
+
+    if len(canonical_all) == 0:
+        canonical = CanonicalConstraint.empty(n_vars)
+    elif len(canonical_all) == 1:
+        canonical = canonical_all[0]
+    else:
+        canonical = CanonicalConstraint.concatenate(canonical_all,
+                                                    sparse_jacobian)
+
+    # Generate the Hessian of the Lagrangian.
+    lagrangian_hess = LagrangianHessian(n_vars, objective.hess, canonical.hess)
+
+    # Choose appropriate method
+    if canonical.n_ineq == 0:
+        method = 'equality_constrained_sqp'
+    else:
+        method = 'tr_interior_point'
+
+    # Construct OptimizeResult
+    state = OptimizeResult(
+        nit=0, nfev=0, njev=0, nhev=0,
+        cg_niter=0, cg_stop_cond=0,
+        fun=objective.f, grad=objective.g,
+        lagrangian_grad=np.copy(objective.g),
+        constr=[c.fun.f for c in prepared_constraints],
+        jac=[c.fun.J for c in prepared_constraints],
+        constr_nfev=[0 for c in prepared_constraints],
+        constr_njev=[0 for c in prepared_constraints],
+        constr_nhev=[0 for c in prepared_constraints],
+        v=[c.fun.v for c in prepared_constraints],
+        method=method)
+
+    # Start counting
+    start_time = time.time()
+
+    # Define stop criteria
+    if method == 'equality_constrained_sqp':
+        def stop_criteria(state, x, last_iteration_failed,
+                          optimality, constr_violation,
+                          tr_radius, constr_penalty, cg_info):
+            state = update_state_sqp(state, x, last_iteration_failed,
+                                     objective, prepared_constraints,
+                                     start_time, tr_radius, constr_penalty,
+                                     cg_info)
+            if verbose == 2:
+                BasicReport.print_iteration(state.nit,
+                                            state.nfev,
+                                            state.cg_niter,
+                                            state.fun,
+                                            state.tr_radius,
+                                            state.optimality,
+                                            state.constr_violation)
+            elif verbose > 2:
+                SQPReport.print_iteration(state.nit,
+                                          state.nfev,
+                                          state.cg_niter,
+                                          state.fun,
+                                          state.tr_radius,
+                                          state.optimality,
+                                          state.constr_violation,
+                                          state.constr_penalty,
+                                          state.cg_stop_cond)
+            state.status = None
+            state.niter = state.nit  # Alias for callback (backward-compatibility)
+            if callback is not None:
+                callback_stop = False
+                try:
+                    callback_stop = callback(state)
+                except StopIteration:
+                    callback_stop = True
+                if callback_stop:
+                    state.status = 3
+                    return True
+            if state.optimality < gtol and state.constr_violation < gtol:
+                state.status = 1
+            elif state.tr_radius < xtol:
+                state.status = 2
+            elif state.nit >= maxiter:
+                state.status = 0
+            return state.status in (0, 1, 2, 3)
+    elif method == 'tr_interior_point':
+        def stop_criteria(state, x, last_iteration_failed, tr_radius,
+                          constr_penalty, cg_info, barrier_parameter,
+                          barrier_tolerance):
+            state = update_state_ip(state, x, last_iteration_failed,
+                                    objective, prepared_constraints,
+                                    start_time, tr_radius, constr_penalty,
+                                    cg_info, barrier_parameter, barrier_tolerance)
+            if verbose == 2:
+                BasicReport.print_iteration(state.nit,
+                                            state.nfev,
+                                            state.cg_niter,
+                                            state.fun,
+                                            state.tr_radius,
+                                            state.optimality,
+                                            state.constr_violation)
+            elif verbose > 2:
+                IPReport.print_iteration(state.nit,
+                                         state.nfev,
+                                         state.cg_niter,
+                                         state.fun,
+                                         state.tr_radius,
+                                         state.optimality,
+                                         state.constr_violation,
+                                         state.constr_penalty,
+                                         state.barrier_parameter,
+                                         state.cg_stop_cond)
+            state.status = None
+            state.niter = state.nit  # Alias for callback (backward compatibility)
+            if callback is not None:
+                callback_stop = False
+                try:
+                    callback_stop = callback(state)
+                except StopIteration:
+                    callback_stop = True
+                if callback_stop:
+                    state.status = 3
+                    return True
+            if state.optimality < gtol and state.constr_violation < gtol:
+                state.status = 1
+            elif (state.tr_radius < xtol
+                  and state.barrier_parameter < barrier_tol):
+                state.status = 2
+            elif state.nit >= maxiter:
+                state.status = 0
+            return state.status in (0, 1, 2, 3)
+
+    if verbose == 2:
+        BasicReport.print_header()
+    elif verbose > 2:
+        if method == 'equality_constrained_sqp':
+            SQPReport.print_header()
+        elif method == 'tr_interior_point':
+            IPReport.print_header()
+
+    # Call inferior function to do the optimization
+    if method == 'equality_constrained_sqp':
+        def fun_and_constr(x):
+            f = objective.fun(x)
+            c_eq, _ = canonical.fun(x)
+            return f, c_eq
+
+        def grad_and_jac(x):
+            g = objective.grad(x)
+            J_eq, _ = canonical.jac(x)
+            return g, J_eq
+
+        _, result = equality_constrained_sqp(
+            fun_and_constr, grad_and_jac, lagrangian_hess,
+            x0, objective.f, objective.g,
+            c_eq0, J_eq0,
+            stop_criteria, state,
+            initial_constr_penalty, initial_tr_radius,
+            factorization_method)
+
+    elif method == 'tr_interior_point':
+        _, result = tr_interior_point(
+            objective.fun, objective.grad, lagrangian_hess,
+            n_vars, canonical.n_ineq, canonical.n_eq,
+            canonical.fun, canonical.jac,
+            x0, objective.f, objective.g,
+            c_ineq0, J_ineq0, c_eq0, J_eq0,
+            stop_criteria,
+            canonical.keep_feasible,
+            xtol, state, initial_barrier_parameter,
+            initial_barrier_tolerance,
+            initial_constr_penalty, initial_tr_radius,
+            factorization_method)
+
+    # Status 3 occurs when the callback function requests termination,
+    # this is assumed to not be a success.
+    result.success = True if result.status in (1, 2) else False
+    result.message = TERMINATION_MESSAGES[result.status]
+
+    # Alias (for backward compatibility with 1.1.0)
+    result.niter = result.nit
+
+    if verbose == 2:
+        BasicReport.print_footer()
+    elif verbose > 2:
+        if method == 'equality_constrained_sqp':
+            SQPReport.print_footer()
+        elif method == 'tr_interior_point':
+            IPReport.print_footer()
+    if verbose >= 1:
+        print(result.message)
+        print("Number of iterations: {}, function evaluations: {}, "
+              "CG iterations: {}, optimality: {:.2e}, "
+              "constraint violation: {:.2e}, execution time: {:4.2} s."
+              .format(result.nit, result.nfev, result.cg_niter,
+                      result.optimality, result.constr_violation,
+                      result.execution_time))
+    return result
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/projections.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/projections.py
new file mode 100644
index 0000000000000000000000000000000000000000..a07b836bdbad688a265ae34ce91a361fd5050eb1
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/projections.py
@@ -0,0 +1,407 @@
+"""Basic linear factorizations needed by the solver."""
+
+from scipy.sparse import (bmat, csc_matrix, eye, issparse)
+from scipy.sparse.linalg import LinearOperator
+import scipy.linalg
+import scipy.sparse.linalg
+try:
+    from sksparse.cholmod import cholesky_AAt
+    sksparse_available = True
+except ImportError:
+    import warnings
+    sksparse_available = False
+import numpy as np
+from warnings import warn
+
+__all__ = [
+    'orthogonality',
+    'projections',
+]
+
+
+def orthogonality(A, g):
+    """Measure orthogonality between a vector and the null space of a matrix.
+
+    Compute a measure of orthogonality between the null space
+    of the (possibly sparse) matrix ``A`` and a given vector ``g``.
+
+    The formula is a simplified (and cheaper) version of formula (3.13)
+    from [1]_.
+    ``orth =  norm(A g, ord=2)/(norm(A, ord='fro')*norm(g, ord=2))``.
+
+    References
+    ----------
+    .. [1] Gould, Nicholas IM, Mary E. Hribar, and Jorge Nocedal.
+           "On the solution of equality constrained quadratic
+            programming problems arising in optimization."
+            SIAM Journal on Scientific Computing 23.4 (2001): 1376-1395.
+    """
+    # Compute vector norms
+    norm_g = np.linalg.norm(g)
+    # Compute Froebnius norm of the matrix A
+    if issparse(A):
+        norm_A = scipy.sparse.linalg.norm(A, ord='fro')
+    else:
+        norm_A = np.linalg.norm(A, ord='fro')
+
+    # Check if norms are zero
+    if norm_g == 0 or norm_A == 0:
+        return 0
+
+    norm_A_g = np.linalg.norm(A.dot(g))
+    # Orthogonality measure
+    orth = norm_A_g / (norm_A*norm_g)
+    return orth
+
+
+def normal_equation_projections(A, m, n, orth_tol, max_refin, tol):
+    """Return linear operators for matrix A using ``NormalEquation`` approach.
+    """
+    # Cholesky factorization
+    factor = cholesky_AAt(A)
+
+    # z = x - A.T inv(A A.T) A x
+    def null_space(x):
+        v = factor(A.dot(x))
+        z = x - A.T.dot(v)
+
+        # Iterative refinement to improve roundoff
+        # errors described in [2]_, algorithm 5.1.
+        k = 0
+        while orthogonality(A, z) > orth_tol:
+            if k >= max_refin:
+                break
+            # z_next = z - A.T inv(A A.T) A z
+            v = factor(A.dot(z))
+            z = z - A.T.dot(v)
+            k += 1
+
+        return z
+
+    # z = inv(A A.T) A x
+    def least_squares(x):
+        return factor(A.dot(x))
+
+    # z = A.T inv(A A.T) x
+    def row_space(x):
+        return A.T.dot(factor(x))
+
+    return null_space, least_squares, row_space
+
+
+def augmented_system_projections(A, m, n, orth_tol, max_refin, tol):
+    """Return linear operators for matrix A - ``AugmentedSystem``."""
+    # Form augmented system
+    K = csc_matrix(bmat([[eye(n), A.T], [A, None]]))
+    # LU factorization
+    # TODO: Use a symmetric indefinite factorization
+    #       to solve the system twice as fast (because
+    #       of the symmetry).
+    try:
+        solve = scipy.sparse.linalg.factorized(K)
+    except RuntimeError:
+        warn("Singular Jacobian matrix. Using dense SVD decomposition to "
+             "perform the factorizations.",
+             stacklevel=3)
+        return svd_factorization_projections(A.toarray(),
+                                             m, n, orth_tol,
+                                             max_refin, tol)
+
+    # z = x - A.T inv(A A.T) A x
+    # is computed solving the extended system:
+    # [I A.T] * [ z ] = [x]
+    # [A  O ]   [aux]   [0]
+    def null_space(x):
+        # v = [x]
+        #     [0]
+        v = np.hstack([x, np.zeros(m)])
+        # lu_sol = [ z ]
+        #          [aux]
+        lu_sol = solve(v)
+        z = lu_sol[:n]
+
+        # Iterative refinement to improve roundoff
+        # errors described in [2]_, algorithm 5.2.
+        k = 0
+        while orthogonality(A, z) > orth_tol:
+            if k >= max_refin:
+                break
+            # new_v = [x] - [I A.T] * [ z ]
+            #         [0]   [A  O ]   [aux]
+            new_v = v - K.dot(lu_sol)
+            # [I A.T] * [delta  z ] = new_v
+            # [A  O ]   [delta aux]
+            lu_update = solve(new_v)
+            #  [ z ] += [delta  z ]
+            #  [aux]    [delta aux]
+            lu_sol += lu_update
+            z = lu_sol[:n]
+            k += 1
+
+        # return z = x - A.T inv(A A.T) A x
+        return z
+
+    # z = inv(A A.T) A x
+    # is computed solving the extended system:
+    # [I A.T] * [aux] = [x]
+    # [A  O ]   [ z ]   [0]
+    def least_squares(x):
+        # v = [x]
+        #     [0]
+        v = np.hstack([x, np.zeros(m)])
+        # lu_sol = [aux]
+        #          [ z ]
+        lu_sol = solve(v)
+        # return z = inv(A A.T) A x
+        return lu_sol[n:m+n]
+
+    # z = A.T inv(A A.T) x
+    # is computed solving the extended system:
+    # [I A.T] * [ z ] = [0]
+    # [A  O ]   [aux]   [x]
+    def row_space(x):
+        # v = [0]
+        #     [x]
+        v = np.hstack([np.zeros(n), x])
+        # lu_sol = [ z ]
+        #          [aux]
+        lu_sol = solve(v)
+        # return z = A.T inv(A A.T) x
+        return lu_sol[:n]
+
+    return null_space, least_squares, row_space
+
+
+def qr_factorization_projections(A, m, n, orth_tol, max_refin, tol):
+    """Return linear operators for matrix A using ``QRFactorization`` approach.
+    """
+    # QRFactorization
+    Q, R, P = scipy.linalg.qr(A.T, pivoting=True, mode='economic')
+
+    if np.linalg.norm(R[-1, :], np.inf) < tol:
+        warn('Singular Jacobian matrix. Using SVD decomposition to ' +
+             'perform the factorizations.',
+             stacklevel=3)
+        return svd_factorization_projections(A, m, n,
+                                             orth_tol,
+                                             max_refin,
+                                             tol)
+
+    # z = x - A.T inv(A A.T) A x
+    def null_space(x):
+        # v = P inv(R) Q.T x
+        aux1 = Q.T.dot(x)
+        aux2 = scipy.linalg.solve_triangular(R, aux1, lower=False)
+        v = np.zeros(m)
+        v[P] = aux2
+        z = x - A.T.dot(v)
+
+        # Iterative refinement to improve roundoff
+        # errors described in [2]_, algorithm 5.1.
+        k = 0
+        while orthogonality(A, z) > orth_tol:
+            if k >= max_refin:
+                break
+            # v = P inv(R) Q.T x
+            aux1 = Q.T.dot(z)
+            aux2 = scipy.linalg.solve_triangular(R, aux1, lower=False)
+            v[P] = aux2
+            # z_next = z - A.T v
+            z = z - A.T.dot(v)
+            k += 1
+
+        return z
+
+    # z = inv(A A.T) A x
+    def least_squares(x):
+        # z = P inv(R) Q.T x
+        aux1 = Q.T.dot(x)
+        aux2 = scipy.linalg.solve_triangular(R, aux1, lower=False)
+        z = np.zeros(m)
+        z[P] = aux2
+        return z
+
+    # z = A.T inv(A A.T) x
+    def row_space(x):
+        # z = Q inv(R.T) P.T x
+        aux1 = x[P]
+        aux2 = scipy.linalg.solve_triangular(R, aux1,
+                                             lower=False,
+                                             trans='T')
+        z = Q.dot(aux2)
+        return z
+
+    return null_space, least_squares, row_space
+
+
+def svd_factorization_projections(A, m, n, orth_tol, max_refin, tol):
+    """Return linear operators for matrix A using ``SVDFactorization`` approach.
+    """
+    # SVD Factorization
+    U, s, Vt = scipy.linalg.svd(A, full_matrices=False)
+
+    # Remove dimensions related with very small singular values
+    U = U[:, s > tol]
+    Vt = Vt[s > tol, :]
+    s = s[s > tol]
+
+    # z = x - A.T inv(A A.T) A x
+    def null_space(x):
+        # v = U 1/s V.T x = inv(A A.T) A x
+        aux1 = Vt.dot(x)
+        aux2 = 1/s*aux1
+        v = U.dot(aux2)
+        z = x - A.T.dot(v)
+
+        # Iterative refinement to improve roundoff
+        # errors described in [2]_, algorithm 5.1.
+        k = 0
+        while orthogonality(A, z) > orth_tol:
+            if k >= max_refin:
+                break
+            # v = U 1/s V.T x = inv(A A.T) A x
+            aux1 = Vt.dot(z)
+            aux2 = 1/s*aux1
+            v = U.dot(aux2)
+            # z_next = z - A.T v
+            z = z - A.T.dot(v)
+            k += 1
+
+        return z
+
+    # z = inv(A A.T) A x
+    def least_squares(x):
+        # z = U 1/s V.T x = inv(A A.T) A x
+        aux1 = Vt.dot(x)
+        aux2 = 1/s*aux1
+        z = U.dot(aux2)
+        return z
+
+    # z = A.T inv(A A.T) x
+    def row_space(x):
+        # z = V 1/s U.T x
+        aux1 = U.T.dot(x)
+        aux2 = 1/s*aux1
+        z = Vt.T.dot(aux2)
+        return z
+
+    return null_space, least_squares, row_space
+
+
+def projections(A, method=None, orth_tol=1e-12, max_refin=3, tol=1e-15):
+    """Return three linear operators related with a given matrix A.
+
+    Parameters
+    ----------
+    A : sparse matrix (or ndarray), shape (m, n)
+        Matrix ``A`` used in the projection.
+    method : string, optional
+        Method used for compute the given linear
+        operators. Should be one of:
+
+            - 'NormalEquation': The operators
+               will be computed using the
+               so-called normal equation approach
+               explained in [1]_. In order to do
+               so the Cholesky factorization of
+               ``(A A.T)`` is computed. Exclusive
+               for sparse matrices.
+            - 'AugmentedSystem': The operators
+               will be computed using the
+               so-called augmented system approach
+               explained in [1]_. Exclusive
+               for sparse matrices.
+            - 'QRFactorization': Compute projections
+               using QR factorization. Exclusive for
+               dense matrices.
+            - 'SVDFactorization': Compute projections
+               using SVD factorization. Exclusive for
+               dense matrices.
+
+    orth_tol : float, optional
+        Tolerance for iterative refinements.
+    max_refin : int, optional
+        Maximum number of iterative refinements.
+    tol : float, optional
+        Tolerance for singular values.
+
+    Returns
+    -------
+    Z : LinearOperator, shape (n, n)
+        Null-space operator. For a given vector ``x``,
+        the null space operator is equivalent to apply
+        a projection matrix ``P = I - A.T inv(A A.T) A``
+        to the vector. It can be shown that this is
+        equivalent to project ``x`` into the null space
+        of A.
+    LS : LinearOperator, shape (m, n)
+        Least-squares operator. For a given vector ``x``,
+        the least-squares operator is equivalent to apply a
+        pseudoinverse matrix ``pinv(A.T) = inv(A A.T) A``
+        to the vector. It can be shown that this vector
+        ``pinv(A.T) x`` is the least_square solution to
+        ``A.T y = x``.
+    Y : LinearOperator, shape (n, m)
+        Row-space operator. For a given vector ``x``,
+        the row-space operator is equivalent to apply a
+        projection matrix ``Q = A.T inv(A A.T)``
+        to the vector.  It can be shown that this
+        vector ``y = Q x``  the minimum norm solution
+        of ``A y = x``.
+
+    Notes
+    -----
+    Uses iterative refinements described in [1]
+    during the computation of ``Z`` in order to
+    cope with the possibility of large roundoff errors.
+
+    References
+    ----------
+    .. [1] Gould, Nicholas IM, Mary E. Hribar, and Jorge Nocedal.
+        "On the solution of equality constrained quadratic
+        programming problems arising in optimization."
+        SIAM Journal on Scientific Computing 23.4 (2001): 1376-1395.
+    """
+    m, n = np.shape(A)
+
+    # The factorization of an empty matrix
+    # only works for the sparse representation.
+    if m*n == 0:
+        A = csc_matrix(A)
+
+    # Check Argument
+    if issparse(A):
+        if method is None:
+            method = "AugmentedSystem"
+        if method not in ("NormalEquation", "AugmentedSystem"):
+            raise ValueError("Method not allowed for sparse matrix.")
+        if method == "NormalEquation" and not sksparse_available:
+            warnings.warn("Only accepts 'NormalEquation' option when "
+                          "scikit-sparse is available. Using "
+                          "'AugmentedSystem' option instead.",
+                          ImportWarning, stacklevel=3)
+            method = 'AugmentedSystem'
+    else:
+        if method is None:
+            method = "QRFactorization"
+        if method not in ("QRFactorization", "SVDFactorization"):
+            raise ValueError("Method not allowed for dense array.")
+
+    if method == 'NormalEquation':
+        null_space, least_squares, row_space \
+            = normal_equation_projections(A, m, n, orth_tol, max_refin, tol)
+    elif method == 'AugmentedSystem':
+        null_space, least_squares, row_space \
+            = augmented_system_projections(A, m, n, orth_tol, max_refin, tol)
+    elif method == "QRFactorization":
+        null_space, least_squares, row_space \
+            = qr_factorization_projections(A, m, n, orth_tol, max_refin, tol)
+    elif method == "SVDFactorization":
+        null_space, least_squares, row_space \
+            = svd_factorization_projections(A, m, n, orth_tol, max_refin, tol)
+
+    Z = LinearOperator((n, n), null_space)
+    LS = LinearOperator((m, n), least_squares)
+    Y = LinearOperator((n, m), row_space)
+
+    return Z, LS, Y
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/qp_subproblem.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/qp_subproblem.py
new file mode 100644
index 0000000000000000000000000000000000000000..a039a7738c283f90f30fd7c4583bf9e1a8f559d5
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/qp_subproblem.py
@@ -0,0 +1,637 @@
+"""Equality-constrained quadratic programming solvers."""
+
+from scipy.sparse import (linalg, bmat, csc_matrix)
+from math import copysign
+import numpy as np
+from numpy.linalg import norm
+
+__all__ = [
+    'eqp_kktfact',
+    'sphere_intersections',
+    'box_intersections',
+    'box_sphere_intersections',
+    'inside_box_boundaries',
+    'modified_dogleg',
+    'projected_cg'
+]
+
+
+# For comparison with the projected CG
+def eqp_kktfact(H, c, A, b):
+    """Solve equality-constrained quadratic programming (EQP) problem.
+
+    Solve ``min 1/2 x.T H x + x.t c`` subject to ``A x + b = 0``
+    using direct factorization of the KKT system.
+
+    Parameters
+    ----------
+    H : sparse matrix, shape (n, n)
+        Hessian matrix of the EQP problem.
+    c : array_like, shape (n,)
+        Gradient of the quadratic objective function.
+    A : sparse matrix
+        Jacobian matrix of the EQP problem.
+    b : array_like, shape (m,)
+        Right-hand side of the constraint equation.
+
+    Returns
+    -------
+    x : array_like, shape (n,)
+        Solution of the KKT problem.
+    lagrange_multipliers : ndarray, shape (m,)
+        Lagrange multipliers of the KKT problem.
+    """
+    n, = np.shape(c)  # Number of parameters
+    m, = np.shape(b)  # Number of constraints
+
+    # Karush-Kuhn-Tucker matrix of coefficients.
+    # Defined as in Nocedal/Wright "Numerical
+    # Optimization" p.452 in Eq. (16.4).
+    kkt_matrix = csc_matrix(bmat([[H, A.T], [A, None]]))
+    # Vector of coefficients.
+    kkt_vec = np.hstack([-c, -b])
+
+    # TODO: Use a symmetric indefinite factorization
+    #       to solve the system twice as fast (because
+    #       of the symmetry).
+    lu = linalg.splu(kkt_matrix)
+    kkt_sol = lu.solve(kkt_vec)
+    x = kkt_sol[:n]
+    lagrange_multipliers = -kkt_sol[n:n+m]
+
+    return x, lagrange_multipliers
+
+
+def sphere_intersections(z, d, trust_radius,
+                         entire_line=False):
+    """Find the intersection between segment (or line) and spherical constraints.
+
+    Find the intersection between the segment (or line) defined by the
+    parametric  equation ``x(t) = z + t*d`` and the ball
+    ``||x|| <= trust_radius``.
+
+    Parameters
+    ----------
+    z : array_like, shape (n,)
+        Initial point.
+    d : array_like, shape (n,)
+        Direction.
+    trust_radius : float
+        Ball radius.
+    entire_line : bool, optional
+        When ``True``, the function returns the intersection between the line
+        ``x(t) = z + t*d`` (``t`` can assume any value) and the ball
+        ``||x|| <= trust_radius``. When ``False``, the function returns the intersection
+        between the segment ``x(t) = z + t*d``, ``0 <= t <= 1``, and the ball.
+
+    Returns
+    -------
+    ta, tb : float
+        The line/segment ``x(t) = z + t*d`` is inside the ball for
+        for ``ta <= t <= tb``.
+    intersect : bool
+        When ``True``, there is a intersection between the line/segment
+        and the sphere. On the other hand, when ``False``, there is no
+        intersection.
+    """
+    # Special case when d=0
+    if norm(d) == 0:
+        return 0, 0, False
+    # Check for inf trust_radius
+    if np.isinf(trust_radius):
+        if entire_line:
+            ta = -np.inf
+            tb = np.inf
+        else:
+            ta = 0
+            tb = 1
+        intersect = True
+        return ta, tb, intersect
+
+    a = np.dot(d, d)
+    b = 2 * np.dot(z, d)
+    c = np.dot(z, z) - trust_radius**2
+    discriminant = b*b - 4*a*c
+    if discriminant < 0:
+        intersect = False
+        return 0, 0, intersect
+    sqrt_discriminant = np.sqrt(discriminant)
+
+    # The following calculation is mathematically
+    # equivalent to:
+    # ta = (-b - sqrt_discriminant) / (2*a)
+    # tb = (-b + sqrt_discriminant) / (2*a)
+    # but produce smaller round off errors.
+    # Look at Matrix Computation p.97
+    # for a better justification.
+    aux = b + copysign(sqrt_discriminant, b)
+    ta = -aux / (2*a)
+    tb = -2*c / aux
+    ta, tb = sorted([ta, tb])
+
+    if entire_line:
+        intersect = True
+    else:
+        # Checks to see if intersection happens
+        # within vectors length.
+        if tb < 0 or ta > 1:
+            intersect = False
+            ta = 0
+            tb = 0
+        else:
+            intersect = True
+            # Restrict intersection interval
+            # between 0 and 1.
+            ta = max(0, ta)
+            tb = min(1, tb)
+
+    return ta, tb, intersect
+
+
+def box_intersections(z, d, lb, ub,
+                      entire_line=False):
+    """Find the intersection between segment (or line) and box constraints.
+
+    Find the intersection between the segment (or line) defined by the
+    parametric  equation ``x(t) = z + t*d`` and the rectangular box
+    ``lb <= x <= ub``.
+
+    Parameters
+    ----------
+    z : array_like, shape (n,)
+        Initial point.
+    d : array_like, shape (n,)
+        Direction.
+    lb : array_like, shape (n,)
+        Lower bounds to each one of the components of ``x``. Used
+        to delimit the rectangular box.
+    ub : array_like, shape (n, )
+        Upper bounds to each one of the components of ``x``. Used
+        to delimit the rectangular box.
+    entire_line : bool, optional
+        When ``True``, the function returns the intersection between the line
+        ``x(t) = z + t*d`` (``t`` can assume any value) and the rectangular
+        box. When ``False``, the function returns the intersection between the segment
+        ``x(t) = z + t*d``, ``0 <= t <= 1``, and the rectangular box.
+
+    Returns
+    -------
+    ta, tb : float
+        The line/segment ``x(t) = z + t*d`` is inside the box for
+        for ``ta <= t <= tb``.
+    intersect : bool
+        When ``True``, there is a intersection between the line (or segment)
+        and the rectangular box. On the other hand, when ``False``, there is no
+        intersection.
+    """
+    # Make sure it is a numpy array
+    z = np.asarray(z)
+    d = np.asarray(d)
+    lb = np.asarray(lb)
+    ub = np.asarray(ub)
+    # Special case when d=0
+    if norm(d) == 0:
+        return 0, 0, False
+
+    # Get values for which d==0
+    zero_d = (d == 0)
+    # If the boundaries are not satisfied for some coordinate
+    # for which "d" is zero, there is no box-line intersection.
+    if (z[zero_d] < lb[zero_d]).any() or (z[zero_d] > ub[zero_d]).any():
+        intersect = False
+        return 0, 0, intersect
+    # Remove values for which d is zero
+    not_zero_d = np.logical_not(zero_d)
+    z = z[not_zero_d]
+    d = d[not_zero_d]
+    lb = lb[not_zero_d]
+    ub = ub[not_zero_d]
+
+    # Find a series of intervals (t_lb[i], t_ub[i]).
+    t_lb = (lb-z) / d
+    t_ub = (ub-z) / d
+    # Get the intersection of all those intervals.
+    ta = max(np.minimum(t_lb, t_ub))
+    tb = min(np.maximum(t_lb, t_ub))
+
+    # Check if intersection is feasible
+    if ta <= tb:
+        intersect = True
+    else:
+        intersect = False
+    # Checks to see if intersection happens within vectors length.
+    if not entire_line:
+        if tb < 0 or ta > 1:
+            intersect = False
+            ta = 0
+            tb = 0
+        else:
+            # Restrict intersection interval between 0 and 1.
+            ta = max(0, ta)
+            tb = min(1, tb)
+
+    return ta, tb, intersect
+
+
+def box_sphere_intersections(z, d, lb, ub, trust_radius,
+                             entire_line=False,
+                             extra_info=False):
+    """Find the intersection between segment (or line) and box/sphere constraints.
+
+    Find the intersection between the segment (or line) defined by the
+    parametric  equation ``x(t) = z + t*d``, the rectangular box
+    ``lb <= x <= ub`` and the ball ``||x|| <= trust_radius``.
+
+    Parameters
+    ----------
+    z : array_like, shape (n,)
+        Initial point.
+    d : array_like, shape (n,)
+        Direction.
+    lb : array_like, shape (n,)
+        Lower bounds to each one of the components of ``x``. Used
+        to delimit the rectangular box.
+    ub : array_like, shape (n, )
+        Upper bounds to each one of the components of ``x``. Used
+        to delimit the rectangular box.
+    trust_radius : float
+        Ball radius.
+    entire_line : bool, optional
+        When ``True``, the function returns the intersection between the line
+        ``x(t) = z + t*d`` (``t`` can assume any value) and the constraints.
+        When ``False``, the function returns the intersection between the segment
+        ``x(t) = z + t*d``, ``0 <= t <= 1`` and the constraints.
+    extra_info : bool, optional
+        When ``True``, the function returns ``intersect_sphere`` and ``intersect_box``.
+
+    Returns
+    -------
+    ta, tb : float
+        The line/segment ``x(t) = z + t*d`` is inside the rectangular box and
+        inside the ball for ``ta <= t <= tb``.
+    intersect : bool
+        When ``True``, there is a intersection between the line (or segment)
+        and both constraints. On the other hand, when ``False``, there is no
+        intersection.
+    sphere_info : dict, optional
+        Dictionary ``{ta, tb, intersect}`` containing the interval ``[ta, tb]``
+        for which the line intercepts the ball. And a boolean value indicating
+        whether the sphere is intersected by the line.
+    box_info : dict, optional
+        Dictionary ``{ta, tb, intersect}`` containing the interval ``[ta, tb]``
+        for which the line intercepts the box. And a boolean value indicating
+        whether the box is intersected by the line.
+    """
+    ta_b, tb_b, intersect_b = box_intersections(z, d, lb, ub,
+                                                entire_line)
+    ta_s, tb_s, intersect_s = sphere_intersections(z, d,
+                                                   trust_radius,
+                                                   entire_line)
+    ta = np.maximum(ta_b, ta_s)
+    tb = np.minimum(tb_b, tb_s)
+    if intersect_b and intersect_s and ta <= tb:
+        intersect = True
+    else:
+        intersect = False
+
+    if extra_info:
+        sphere_info = {'ta': ta_s, 'tb': tb_s, 'intersect': intersect_s}
+        box_info = {'ta': ta_b, 'tb': tb_b, 'intersect': intersect_b}
+        return ta, tb, intersect, sphere_info, box_info
+    else:
+        return ta, tb, intersect
+
+
+def inside_box_boundaries(x, lb, ub):
+    """Check if lb <= x <= ub."""
+    return (lb <= x).all() and (x <= ub).all()
+
+
+def reinforce_box_boundaries(x, lb, ub):
+    """Return clipped value of x"""
+    return np.minimum(np.maximum(x, lb), ub)
+
+
+def modified_dogleg(A, Y, b, trust_radius, lb, ub):
+    """Approximately  minimize ``1/2*|| A x + b ||^2`` inside trust-region.
+
+    Approximately solve the problem of minimizing ``1/2*|| A x + b ||^2``
+    subject to ``||x|| < Delta`` and ``lb <= x <= ub`` using a modification
+    of the classical dogleg approach.
+
+    Parameters
+    ----------
+    A : LinearOperator (or sparse matrix or ndarray), shape (m, n)
+        Matrix ``A`` in the minimization problem. It should have
+        dimension ``(m, n)`` such that ``m < n``.
+    Y : LinearOperator (or sparse matrix or ndarray), shape (n, m)
+        LinearOperator that apply the projection matrix
+        ``Q = A.T inv(A A.T)`` to the vector. The obtained vector
+        ``y = Q x`` being the minimum norm solution of ``A y = x``.
+    b : array_like, shape (m,)
+        Vector ``b``in the minimization problem.
+    trust_radius: float
+        Trust radius to be considered. Delimits a sphere boundary
+        to the problem.
+    lb : array_like, shape (n,)
+        Lower bounds to each one of the components of ``x``.
+        It is expected that ``lb <= 0``, otherwise the algorithm
+        may fail. If ``lb[i] = -Inf``, the lower
+        bound for the ith component is just ignored.
+    ub : array_like, shape (n, )
+        Upper bounds to each one of the components of ``x``.
+        It is expected that ``ub >= 0``, otherwise the algorithm
+        may fail. If ``ub[i] = Inf``, the upper bound for the ith
+        component is just ignored.
+
+    Returns
+    -------
+    x : array_like, shape (n,)
+        Solution to the problem.
+
+    Notes
+    -----
+    Based on implementations described in pp. 885-886 from [1]_.
+
+    References
+    ----------
+    .. [1] Byrd, Richard H., Mary E. Hribar, and Jorge Nocedal.
+           "An interior point algorithm for large-scale nonlinear
+           programming." SIAM Journal on Optimization 9.4 (1999): 877-900.
+    """
+    # Compute minimum norm minimizer of 1/2*|| A x + b ||^2.
+    newton_point = -Y.dot(b)
+    # Check for interior point
+    if inside_box_boundaries(newton_point, lb, ub)  \
+       and norm(newton_point) <= trust_radius:
+        x = newton_point
+        return x
+
+    # Compute gradient vector ``g = A.T b``
+    g = A.T.dot(b)
+    # Compute Cauchy point
+    # `cauchy_point = g.T g / (g.T A.T A g)``.
+    A_g = A.dot(g)
+    cauchy_point = -np.dot(g, g) / np.dot(A_g, A_g) * g
+    # Origin
+    origin_point = np.zeros_like(cauchy_point)
+
+    # Check the segment between cauchy_point and newton_point
+    # for a possible solution.
+    z = cauchy_point
+    p = newton_point - cauchy_point
+    _, alpha, intersect = box_sphere_intersections(z, p, lb, ub,
+                                                   trust_radius)
+    if intersect:
+        x1 = z + alpha*p
+    else:
+        # Check the segment between the origin and cauchy_point
+        # for a possible solution.
+        z = origin_point
+        p = cauchy_point
+        _, alpha, _ = box_sphere_intersections(z, p, lb, ub,
+                                               trust_radius)
+        x1 = z + alpha*p
+
+    # Check the segment between origin and newton_point
+    # for a possible solution.
+    z = origin_point
+    p = newton_point
+    _, alpha, _ = box_sphere_intersections(z, p, lb, ub,
+                                           trust_radius)
+    x2 = z + alpha*p
+
+    # Return the best solution among x1 and x2.
+    if norm(A.dot(x1) + b) < norm(A.dot(x2) + b):
+        return x1
+    else:
+        return x2
+
+
+def projected_cg(H, c, Z, Y, b, trust_radius=np.inf,
+                 lb=None, ub=None, tol=None,
+                 max_iter=None, max_infeasible_iter=None,
+                 return_all=False):
+    """Solve EQP problem with projected CG method.
+
+    Solve equality-constrained quadratic programming problem
+    ``min 1/2 x.T H x + x.t c``  subject to ``A x + b = 0`` and,
+    possibly, to trust region constraints ``||x|| < trust_radius``
+    and box constraints ``lb <= x <= ub``.
+
+    Parameters
+    ----------
+    H : LinearOperator (or sparse matrix or ndarray), shape (n, n)
+        Operator for computing ``H v``.
+    c : array_like, shape (n,)
+        Gradient of the quadratic objective function.
+    Z : LinearOperator (or sparse matrix or ndarray), shape (n, n)
+        Operator for projecting ``x`` into the null space of A.
+    Y : LinearOperator,  sparse matrix, ndarray, shape (n, m)
+        Operator that, for a given a vector ``b``, compute smallest
+        norm solution of ``A x + b = 0``.
+    b : array_like, shape (m,)
+        Right-hand side of the constraint equation.
+    trust_radius : float, optional
+        Trust radius to be considered. By default, uses ``trust_radius=inf``,
+        which means no trust radius at all.
+    lb : array_like, shape (n,), optional
+        Lower bounds to each one of the components of ``x``.
+        If ``lb[i] = -Inf`` the lower bound for the i-th
+        component is just ignored (default).
+    ub : array_like, shape (n, ), optional
+        Upper bounds to each one of the components of ``x``.
+        If ``ub[i] = Inf`` the upper bound for the i-th
+        component is just ignored (default).
+    tol : float, optional
+        Tolerance used to interrupt the algorithm.
+    max_iter : int, optional
+        Maximum algorithm iterations. Where ``max_inter <= n-m``.
+        By default, uses ``max_iter = n-m``.
+    max_infeasible_iter : int, optional
+        Maximum infeasible (regarding box constraints) iterations the
+        algorithm is allowed to take.
+        By default, uses ``max_infeasible_iter = n-m``.
+    return_all : bool, optional
+        When ``true``, return the list of all vectors through the iterations.
+
+    Returns
+    -------
+    x : array_like, shape (n,)
+        Solution of the EQP problem.
+    info : Dict
+        Dictionary containing the following:
+
+            - niter : Number of iterations.
+            - stop_cond : Reason for algorithm termination:
+                1. Iteration limit was reached;
+                2. Reached the trust-region boundary;
+                3. Negative curvature detected;
+                4. Tolerance was satisfied.
+            - allvecs : List containing all intermediary vectors (optional).
+            - hits_boundary : True if the proposed step is on the boundary
+              of the trust region.
+
+    Notes
+    -----
+    Implementation of Algorithm 6.2 on [1]_.
+
+    In the absence of spherical and box constraints, for sufficient
+    iterations, the method returns a truly optimal result.
+    In the presence of those constraints, the value returned is only
+    a inexpensive approximation of the optimal value.
+
+    References
+    ----------
+    .. [1] Gould, Nicholas IM, Mary E. Hribar, and Jorge Nocedal.
+           "On the solution of equality constrained quadratic
+            programming problems arising in optimization."
+            SIAM Journal on Scientific Computing 23.4 (2001): 1376-1395.
+    """
+    CLOSE_TO_ZERO = 1e-25
+
+    n, = np.shape(c)  # Number of parameters
+    m, = np.shape(b)  # Number of constraints
+
+    # Initial Values
+    x = Y.dot(-b)
+    r = Z.dot(H.dot(x) + c)
+    g = Z.dot(r)
+    p = -g
+
+    # Store ``x`` value
+    if return_all:
+        allvecs = [x]
+    # Values for the first iteration
+    H_p = H.dot(p)
+    rt_g = norm(g)**2  # g.T g = r.T Z g = r.T g (ref [1]_ p.1389)
+
+    # If x > trust-region the problem does not have a solution.
+    tr_distance = trust_radius - norm(x)
+    if tr_distance < 0:
+        raise ValueError("Trust region problem does not have a solution.")
+    # If x == trust_radius, then x is the solution
+    # to the optimization problem, since x is the
+    # minimum norm solution to Ax=b.
+    elif tr_distance < CLOSE_TO_ZERO:
+        info = {'niter': 0, 'stop_cond': 2, 'hits_boundary': True}
+        if return_all:
+            allvecs.append(x)
+            info['allvecs'] = allvecs
+        return x, info
+
+    # Set default tolerance
+    if tol is None:
+        tol = max(min(0.01 * np.sqrt(rt_g), 0.1 * rt_g), CLOSE_TO_ZERO)
+    # Set default lower and upper bounds
+    if lb is None:
+        lb = np.full(n, -np.inf)
+    if ub is None:
+        ub = np.full(n, np.inf)
+    # Set maximum iterations
+    if max_iter is None:
+        max_iter = n-m
+    max_iter = min(max_iter, n-m)
+    # Set maximum infeasible iterations
+    if max_infeasible_iter is None:
+        max_infeasible_iter = n-m
+
+    hits_boundary = False
+    stop_cond = 1
+    counter = 0
+    last_feasible_x = np.zeros_like(x)
+    k = 0
+    for i in range(max_iter):
+        # Stop criteria - Tolerance : r.T g < tol
+        if rt_g < tol:
+            stop_cond = 4
+            break
+        k += 1
+        # Compute curvature
+        pt_H_p = H_p.dot(p)
+        # Stop criteria - Negative curvature
+        if pt_H_p <= 0:
+            if np.isinf(trust_radius):
+                raise ValueError("Negative curvature not allowed "
+                                 "for unrestricted problems.")
+            else:
+                # Find intersection with constraints
+                _, alpha, intersect = box_sphere_intersections(
+                    x, p, lb, ub, trust_radius, entire_line=True)
+                # Update solution
+                if intersect:
+                    x = x + alpha*p
+                # Reinforce variables are inside box constraints.
+                # This is only necessary because of roundoff errors.
+                x = reinforce_box_boundaries(x, lb, ub)
+                # Attribute information
+                stop_cond = 3
+                hits_boundary = True
+                break
+
+        # Get next step
+        alpha = rt_g / pt_H_p
+        x_next = x + alpha*p
+
+        # Stop criteria - Hits boundary
+        if np.linalg.norm(x_next) >= trust_radius:
+            # Find intersection with box constraints
+            _, theta, intersect = box_sphere_intersections(x, alpha*p, lb, ub,
+                                                           trust_radius)
+            # Update solution
+            if intersect:
+                x = x + theta*alpha*p
+            # Reinforce variables are inside box constraints.
+            # This is only necessary because of roundoff errors.
+            x = reinforce_box_boundaries(x, lb, ub)
+            # Attribute information
+            stop_cond = 2
+            hits_boundary = True
+            break
+
+        # Check if ``x`` is inside the box and start counter if it is not.
+        if inside_box_boundaries(x_next, lb, ub):
+            counter = 0
+        else:
+            counter += 1
+        # Whenever outside box constraints keep looking for intersections.
+        if counter > 0:
+            _, theta, intersect = box_sphere_intersections(x, alpha*p, lb, ub,
+                                                           trust_radius)
+            if intersect:
+                last_feasible_x = x + theta*alpha*p
+                # Reinforce variables are inside box constraints.
+                # This is only necessary because of roundoff errors.
+                last_feasible_x = reinforce_box_boundaries(last_feasible_x,
+                                                           lb, ub)
+                counter = 0
+        # Stop after too many infeasible (regarding box constraints) iteration.
+        if counter > max_infeasible_iter:
+            break
+        # Store ``x_next`` value
+        if return_all:
+            allvecs.append(x_next)
+
+        # Update residual
+        r_next = r + alpha*H_p
+        # Project residual g+ = Z r+
+        g_next = Z.dot(r_next)
+        # Compute conjugate direction step d
+        rt_g_next = norm(g_next)**2  # g.T g = r.T g (ref [1]_ p.1389)
+        beta = rt_g_next / rt_g
+        p = - g_next + beta*p
+        # Prepare for next iteration
+        x = x_next
+        g = g_next
+        r = g_next
+        rt_g = norm(g)**2  # g.T g = r.T Z g = r.T g (ref [1]_ p.1389)
+        H_p = H.dot(p)
+
+    if not inside_box_boundaries(x, lb, ub):
+        x = last_feasible_x
+        hits_boundary = True
+    info = {'niter': k, 'stop_cond': stop_cond,
+            'hits_boundary': hits_boundary}
+    if return_all:
+        info['allvecs'] = allvecs
+    return x, info
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/report.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/report.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c3b2fb4ef5c90da78ae3f181159140e87393dcf
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/report.py
@@ -0,0 +1,51 @@
+"""Progress report printers."""
+
+from __future__ import annotations
+
+class ReportBase:
+    COLUMN_NAMES: list[str] = NotImplemented
+    COLUMN_WIDTHS: list[int] = NotImplemented
+    ITERATION_FORMATS: list[str] = NotImplemented
+
+    @classmethod
+    def print_header(cls):
+        fmt = ("|"
+               + "|".join([f"{{:^{x}}}" for x in cls.COLUMN_WIDTHS])
+               + "|")
+        separators = ['-' * x for x in cls.COLUMN_WIDTHS]
+        print(fmt.format(*cls.COLUMN_NAMES))
+        print(fmt.format(*separators))
+
+    @classmethod
+    def print_iteration(cls, *args):
+        iteration_format = [f"{{:{x}}}" for x in cls.ITERATION_FORMATS]
+        fmt = "|" + "|".join(iteration_format) + "|"
+        print(fmt.format(*args))
+
+    @classmethod
+    def print_footer(cls):
+        print()
+
+
+class BasicReport(ReportBase):
+    COLUMN_NAMES = ["niter", "f evals", "CG iter", "obj func", "tr radius",
+                    "opt", "c viol"]
+    COLUMN_WIDTHS = [7, 7, 7, 13, 10, 10, 10]
+    ITERATION_FORMATS = ["^7", "^7", "^7", "^+13.4e",
+                         "^10.2e", "^10.2e", "^10.2e"]
+
+
+class SQPReport(ReportBase):
+    COLUMN_NAMES = ["niter", "f evals", "CG iter", "obj func", "tr radius",
+                    "opt", "c viol", "penalty", "CG stop"]
+    COLUMN_WIDTHS = [7, 7, 7, 13, 10, 10, 10, 10, 7]
+    ITERATION_FORMATS = ["^7", "^7", "^7", "^+13.4e", "^10.2e", "^10.2e",
+                         "^10.2e", "^10.2e", "^7"]
+
+
+class IPReport(ReportBase):
+    COLUMN_NAMES = ["niter", "f evals", "CG iter", "obj func", "tr radius",
+                    "opt", "c viol", "penalty", "barrier param", "CG stop"]
+    COLUMN_WIDTHS = [7, 7, 7, 13, 10, 10, 10, 10, 13, 7]
+    ITERATION_FORMATS = ["^7", "^7", "^7", "^+13.4e", "^10.2e", "^10.2e",
+                         "^10.2e", "^10.2e", "^13.2e", "^7"]
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b45250fdab37107215d8effd1f20e9e245c0f378
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_canonical_constraint.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_canonical_constraint.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..539dded3988715395182c155eca4bd74e4922715
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_canonical_constraint.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_projections.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_projections.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bca5222d97ab6fc7906cc158a9a916304611c48
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_projections.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_qp_subproblem.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_qp_subproblem.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fca2021a9fe1a1458e14fe4c1b3aef10dd40874b
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_qp_subproblem.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_report.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_report.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15f713b1d7e24fa56a325fb73ab91058223de9f6
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/__pycache__/test_report.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_canonical_constraint.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_canonical_constraint.py
new file mode 100644
index 0000000000000000000000000000000000000000..452b327d02da3b3bd3fab9592bdef4d56d6aff57
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_canonical_constraint.py
@@ -0,0 +1,296 @@
+import numpy as np
+from numpy.testing import assert_array_equal, assert_equal
+from scipy.optimize._constraints import (NonlinearConstraint, Bounds,
+                                         PreparedConstraint)
+from scipy.optimize._trustregion_constr.canonical_constraint \
+    import CanonicalConstraint, initial_constraints_as_canonical
+
+
+def create_quadratic_function(n, m, rng):
+    a = rng.rand(m)
+    A = rng.rand(m, n)
+    H = rng.rand(m, n, n)
+    HT = np.transpose(H, (1, 2, 0))
+
+    def fun(x):
+        return a + A.dot(x) + 0.5 * H.dot(x).dot(x)
+
+    def jac(x):
+        return A + H.dot(x)
+
+    def hess(x, v):
+        return HT.dot(v)
+
+    return fun, jac, hess
+
+
+def test_bounds_cases():
+    # Test 1: no constraints.
+    user_constraint = Bounds(-np.inf, np.inf)
+    x0 = np.array([-1, 2])
+    prepared_constraint = PreparedConstraint(user_constraint, x0, False)
+    c = CanonicalConstraint.from_PreparedConstraint(prepared_constraint)
+
+    assert_equal(c.n_eq, 0)
+    assert_equal(c.n_ineq, 0)
+
+    c_eq, c_ineq = c.fun(x0)
+    assert_array_equal(c_eq, [])
+    assert_array_equal(c_ineq, [])
+
+    J_eq, J_ineq = c.jac(x0)
+    assert_array_equal(J_eq, np.empty((0, 2)))
+    assert_array_equal(J_ineq, np.empty((0, 2)))
+
+    assert_array_equal(c.keep_feasible, [])
+
+    # Test 2: infinite lower bound.
+    user_constraint = Bounds(-np.inf, [0, np.inf, 1], [False, True, True])
+    x0 = np.array([-1, -2, -3], dtype=float)
+    prepared_constraint = PreparedConstraint(user_constraint, x0, False)
+    c = CanonicalConstraint.from_PreparedConstraint(prepared_constraint)
+
+    assert_equal(c.n_eq, 0)
+    assert_equal(c.n_ineq, 2)
+
+    c_eq, c_ineq = c.fun(x0)
+    assert_array_equal(c_eq, [])
+    assert_array_equal(c_ineq, [-1, -4])
+
+    J_eq, J_ineq = c.jac(x0)
+    assert_array_equal(J_eq, np.empty((0, 3)))
+    assert_array_equal(J_ineq, np.array([[1, 0, 0], [0, 0, 1]]))
+
+    assert_array_equal(c.keep_feasible, [False, True])
+
+    # Test 3: infinite upper bound.
+    user_constraint = Bounds([0, 1, -np.inf], np.inf, [True, False, True])
+    x0 = np.array([1, 2, 3], dtype=float)
+    prepared_constraint = PreparedConstraint(user_constraint, x0, False)
+    c = CanonicalConstraint.from_PreparedConstraint(prepared_constraint)
+
+    assert_equal(c.n_eq, 0)
+    assert_equal(c.n_ineq, 2)
+
+    c_eq, c_ineq = c.fun(x0)
+    assert_array_equal(c_eq, [])
+    assert_array_equal(c_ineq, [-1, -1])
+
+    J_eq, J_ineq = c.jac(x0)
+    assert_array_equal(J_eq, np.empty((0, 3)))
+    assert_array_equal(J_ineq, np.array([[-1, 0, 0], [0, -1, 0]]))
+
+    assert_array_equal(c.keep_feasible, [True, False])
+
+    # Test 4: interval constraint.
+    user_constraint = Bounds([-1, -np.inf, 2, 3], [1, np.inf, 10, 3],
+                             [False, True, True, True])
+    x0 = np.array([0, 10, 8, 5])
+    prepared_constraint = PreparedConstraint(user_constraint, x0, False)
+    c = CanonicalConstraint.from_PreparedConstraint(prepared_constraint)
+
+    assert_equal(c.n_eq, 1)
+    assert_equal(c.n_ineq, 4)
+
+    c_eq, c_ineq = c.fun(x0)
+    assert_array_equal(c_eq, [2])
+    assert_array_equal(c_ineq, [-1, -2, -1, -6])
+
+    J_eq, J_ineq = c.jac(x0)
+    assert_array_equal(J_eq, [[0, 0, 0, 1]])
+    assert_array_equal(J_ineq, [[1, 0, 0, 0],
+                                [0, 0, 1, 0],
+                                [-1, 0, 0, 0],
+                                [0, 0, -1, 0]])
+
+    assert_array_equal(c.keep_feasible, [False, True, False, True])
+
+
+def test_nonlinear_constraint():
+    n = 3
+    m = 5
+    rng = np.random.RandomState(0)
+    x0 = rng.rand(n)
+
+    fun, jac, hess = create_quadratic_function(n, m, rng)
+    f = fun(x0)
+    J = jac(x0)
+
+    lb = [-10, 3, -np.inf, -np.inf, -5]
+    ub = [10, 3, np.inf, 3, np.inf]
+    user_constraint = NonlinearConstraint(
+        fun, lb, ub, jac, hess, [True, False, False, True, False])
+
+    for sparse_jacobian in [False, True]:
+        prepared_constraint = PreparedConstraint(user_constraint, x0,
+                                                 sparse_jacobian)
+        c = CanonicalConstraint.from_PreparedConstraint(prepared_constraint)
+
+        assert_array_equal(c.n_eq, 1)
+        assert_array_equal(c.n_ineq, 4)
+
+        c_eq, c_ineq = c.fun(x0)
+        assert_array_equal(c_eq, [f[1] - lb[1]])
+        assert_array_equal(c_ineq, [f[3] - ub[3], lb[4] - f[4],
+                                    f[0] - ub[0], lb[0] - f[0]])
+
+        J_eq, J_ineq = c.jac(x0)
+        if sparse_jacobian:
+            J_eq = J_eq.toarray()
+            J_ineq = J_ineq.toarray()
+
+        assert_array_equal(J_eq, J[1, None])
+        assert_array_equal(J_ineq, np.vstack((J[3], -J[4], J[0], -J[0])))
+
+        v_eq = rng.rand(c.n_eq)
+        v_ineq = rng.rand(c.n_ineq)
+        v = np.zeros(m)
+        v[1] = v_eq[0]
+        v[3] = v_ineq[0]
+        v[4] = -v_ineq[1]
+        v[0] = v_ineq[2] - v_ineq[3]
+        assert_array_equal(c.hess(x0, v_eq, v_ineq), hess(x0, v))
+
+        assert_array_equal(c.keep_feasible, [True, False, True, True])
+
+
+def test_concatenation():
+    rng = np.random.RandomState(0)
+    n = 4
+    x0 = rng.rand(n)
+
+    f1 = x0
+    J1 = np.eye(n)
+    lb1 = [-1, -np.inf, -2, 3]
+    ub1 = [1, np.inf, np.inf, 3]
+    bounds = Bounds(lb1, ub1, [False, False, True, False])
+
+    fun, jac, hess = create_quadratic_function(n, 5, rng)
+    f2 = fun(x0)
+    J2 = jac(x0)
+    lb2 = [-10, 3, -np.inf, -np.inf, -5]
+    ub2 = [10, 3, np.inf, 5, np.inf]
+    nonlinear = NonlinearConstraint(
+        fun, lb2, ub2, jac, hess, [True, False, False, True, False])
+
+    for sparse_jacobian in [False, True]:
+        bounds_prepared = PreparedConstraint(bounds, x0, sparse_jacobian)
+        nonlinear_prepared = PreparedConstraint(nonlinear, x0, sparse_jacobian)
+
+        c1 = CanonicalConstraint.from_PreparedConstraint(bounds_prepared)
+        c2 = CanonicalConstraint.from_PreparedConstraint(nonlinear_prepared)
+        c = CanonicalConstraint.concatenate([c1, c2], sparse_jacobian)
+
+        assert_equal(c.n_eq, 2)
+        assert_equal(c.n_ineq, 7)
+
+        c_eq, c_ineq = c.fun(x0)
+        assert_array_equal(c_eq, [f1[3] - lb1[3], f2[1] - lb2[1]])
+        assert_array_equal(c_ineq, [lb1[2] - f1[2], f1[0] - ub1[0],
+                                    lb1[0] - f1[0], f2[3] - ub2[3],
+                                    lb2[4] - f2[4], f2[0] - ub2[0],
+                                    lb2[0] - f2[0]])
+
+        J_eq, J_ineq = c.jac(x0)
+        if sparse_jacobian:
+            J_eq = J_eq.toarray()
+            J_ineq = J_ineq.toarray()
+
+        assert_array_equal(J_eq, np.vstack((J1[3], J2[1])))
+        assert_array_equal(J_ineq, np.vstack((-J1[2], J1[0], -J1[0], J2[3],
+                                              -J2[4], J2[0], -J2[0])))
+
+        v_eq = rng.rand(c.n_eq)
+        v_ineq = rng.rand(c.n_ineq)
+        v = np.zeros(5)
+        v[1] = v_eq[1]
+        v[3] = v_ineq[3]
+        v[4] = -v_ineq[4]
+        v[0] = v_ineq[5] - v_ineq[6]
+        H = c.hess(x0, v_eq, v_ineq).dot(np.eye(n))
+        assert_array_equal(H, hess(x0, v))
+
+        assert_array_equal(c.keep_feasible,
+                           [True, False, False, True, False, True, True])
+
+
+def test_empty():
+    x = np.array([1, 2, 3])
+    c = CanonicalConstraint.empty(3)
+    assert_equal(c.n_eq, 0)
+    assert_equal(c.n_ineq, 0)
+
+    c_eq, c_ineq = c.fun(x)
+    assert_array_equal(c_eq, [])
+    assert_array_equal(c_ineq, [])
+
+    J_eq, J_ineq = c.jac(x)
+    assert_array_equal(J_eq, np.empty((0, 3)))
+    assert_array_equal(J_ineq, np.empty((0, 3)))
+
+    H = c.hess(x, None, None).toarray()
+    assert_array_equal(H, np.zeros((3, 3)))
+
+
+def test_initial_constraints_as_canonical():
+    # rng is only used to generate the coefficients of the quadratic
+    # function that is used by the nonlinear constraint.
+    rng = np.random.RandomState(0)
+
+    x0 = np.array([0.5, 0.4, 0.3, 0.2])
+    n = len(x0)
+
+    lb1 = [-1, -np.inf, -2, 3]
+    ub1 = [1, np.inf, np.inf, 3]
+    bounds = Bounds(lb1, ub1, [False, False, True, False])
+
+    fun, jac, hess = create_quadratic_function(n, 5, rng)
+    lb2 = [-10, 3, -np.inf, -np.inf, -5]
+    ub2 = [10, 3, np.inf, 5, np.inf]
+    nonlinear = NonlinearConstraint(
+        fun, lb2, ub2, jac, hess, [True, False, False, True, False])
+
+    for sparse_jacobian in [False, True]:
+        bounds_prepared = PreparedConstraint(bounds, x0, sparse_jacobian)
+        nonlinear_prepared = PreparedConstraint(nonlinear, x0, sparse_jacobian)
+
+        f1 = bounds_prepared.fun.f
+        J1 = bounds_prepared.fun.J
+        f2 = nonlinear_prepared.fun.f
+        J2 = nonlinear_prepared.fun.J
+
+        c_eq, c_ineq, J_eq, J_ineq = initial_constraints_as_canonical(
+            n, [bounds_prepared, nonlinear_prepared], sparse_jacobian)
+
+        assert_array_equal(c_eq, [f1[3] - lb1[3], f2[1] - lb2[1]])
+        assert_array_equal(c_ineq, [lb1[2] - f1[2], f1[0] - ub1[0],
+                                    lb1[0] - f1[0], f2[3] - ub2[3],
+                                    lb2[4] - f2[4], f2[0] - ub2[0],
+                                    lb2[0] - f2[0]])
+
+        if sparse_jacobian:
+            J1 = J1.toarray()
+            J2 = J2.toarray()
+            J_eq = J_eq.toarray()
+            J_ineq = J_ineq.toarray()
+
+        assert_array_equal(J_eq, np.vstack((J1[3], J2[1])))
+        assert_array_equal(J_ineq, np.vstack((-J1[2], J1[0], -J1[0], J2[3],
+                                              -J2[4], J2[0], -J2[0])))
+
+
+def test_initial_constraints_as_canonical_empty():
+    n = 3
+    for sparse_jacobian in [False, True]:
+        c_eq, c_ineq, J_eq, J_ineq = initial_constraints_as_canonical(
+            n, [], sparse_jacobian)
+
+        assert_array_equal(c_eq, [])
+        assert_array_equal(c_ineq, [])
+
+        if sparse_jacobian:
+            J_eq = J_eq.toarray()
+            J_ineq = J_ineq.toarray()
+
+        assert_array_equal(J_eq, np.empty((0, n)))
+        assert_array_equal(J_ineq, np.empty((0, n)))
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_projections.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_projections.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ff3c39d649d0ac663d9b71bb906f1daac021118
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_projections.py
@@ -0,0 +1,214 @@
+import numpy as np
+import scipy.linalg
+from scipy.sparse import csc_matrix
+from scipy.optimize._trustregion_constr.projections \
+    import projections, orthogonality
+from numpy.testing import (TestCase, assert_array_almost_equal,
+                           assert_equal, assert_allclose)
+
+try:
+    from sksparse.cholmod import cholesky_AAt  # noqa: F401
+    sksparse_available = True
+    available_sparse_methods = ("NormalEquation", "AugmentedSystem")
+except ImportError:
+    sksparse_available = False
+    available_sparse_methods = ("AugmentedSystem",)
+available_dense_methods = ('QRFactorization', 'SVDFactorization')
+
+
+class TestProjections(TestCase):
+
+    def test_nullspace_and_least_squares_sparse(self):
+        A_dense = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                            [0, 8, 7, 0, 1, 5, 9, 0],
+                            [1, 0, 0, 0, 0, 1, 2, 3]])
+        At_dense = A_dense.T
+        A = csc_matrix(A_dense)
+        test_points = ([1, 2, 3, 4, 5, 6, 7, 8],
+                       [1, 10, 3, 0, 1, 6, 7, 8],
+                       [1.12, 10, 0, 0, 100000, 6, 0.7, 8])
+
+        for method in available_sparse_methods:
+            Z, LS, _ = projections(A, method)
+            for z in test_points:
+                # Test if x is in the null_space
+                x = Z.matvec(z)
+                assert_array_almost_equal(A.dot(x), 0)
+                # Test orthogonality
+                assert_array_almost_equal(orthogonality(A, x), 0)
+                # Test if x is the least square solution
+                x = LS.matvec(z)
+                x2 = scipy.linalg.lstsq(At_dense, z)[0]
+                assert_array_almost_equal(x, x2)
+
+    def test_iterative_refinements_sparse(self):
+        A_dense = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                            [0, 8, 7, 0, 1, 5, 9, 0],
+                            [1, 0, 0, 0, 0, 1, 2, 3]])
+        A = csc_matrix(A_dense)
+        test_points = ([1, 2, 3, 4, 5, 6, 7, 8],
+                       [1, 10, 3, 0, 1, 6, 7, 8],
+                       [1.12, 10, 0, 0, 100000, 6, 0.7, 8],
+                       [1, 0, 0, 0, 0, 1, 2, 3+1e-10])
+
+        for method in available_sparse_methods:
+            Z, LS, _ = projections(A, method, orth_tol=1e-18, max_refin=100)
+            for z in test_points:
+                # Test if x is in the null_space
+                x = Z.matvec(z)
+                atol = 1e-13 * abs(x).max()
+                assert_allclose(A.dot(x), 0, atol=atol)
+                # Test orthogonality
+                assert_allclose(orthogonality(A, x), 0, atol=1e-13)
+
+    def test_rowspace_sparse(self):
+        A_dense = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                            [0, 8, 7, 0, 1, 5, 9, 0],
+                            [1, 0, 0, 0, 0, 1, 2, 3]])
+        A = csc_matrix(A_dense)
+        test_points = ([1, 2, 3],
+                       [1, 10, 3],
+                       [1.12, 10, 0])
+
+        for method in available_sparse_methods:
+            _, _, Y = projections(A, method)
+            for z in test_points:
+                # Test if x is solution of A x = z
+                x = Y.matvec(z)
+                assert_array_almost_equal(A.dot(x), z)
+                # Test if x is in the return row space of A
+                A_ext = np.vstack((A_dense, x))
+                assert_equal(np.linalg.matrix_rank(A_dense),
+                             np.linalg.matrix_rank(A_ext))
+
+    def test_nullspace_and_least_squares_dense(self):
+        A = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                      [0, 8, 7, 0, 1, 5, 9, 0],
+                      [1, 0, 0, 0, 0, 1, 2, 3]])
+        At = A.T
+        test_points = ([1, 2, 3, 4, 5, 6, 7, 8],
+                       [1, 10, 3, 0, 1, 6, 7, 8],
+                       [1.12, 10, 0, 0, 100000, 6, 0.7, 8])
+
+        for method in available_dense_methods:
+            Z, LS, _ = projections(A, method)
+            for z in test_points:
+                # Test if x is in the null_space
+                x = Z.matvec(z)
+                assert_array_almost_equal(A.dot(x), 0)
+                # Test orthogonality
+                assert_array_almost_equal(orthogonality(A, x), 0)
+                # Test if x is the least square solution
+                x = LS.matvec(z)
+                x2 = scipy.linalg.lstsq(At, z)[0]
+                assert_array_almost_equal(x, x2)
+
+    def test_compare_dense_and_sparse(self):
+        D = np.diag(range(1, 101))
+        A = np.hstack([D, D, D, D])
+        A_sparse = csc_matrix(A)
+        np.random.seed(0)
+
+        Z, LS, Y = projections(A)
+        Z_sparse, LS_sparse, Y_sparse = projections(A_sparse)
+        for k in range(20):
+            z = np.random.normal(size=(400,))
+            assert_array_almost_equal(Z.dot(z), Z_sparse.dot(z))
+            assert_array_almost_equal(LS.dot(z), LS_sparse.dot(z))
+            x = np.random.normal(size=(100,))
+            assert_array_almost_equal(Y.dot(x), Y_sparse.dot(x))
+
+    def test_compare_dense_and_sparse2(self):
+        D1 = np.diag([-1.7, 1, 0.5])
+        D2 = np.diag([1, -0.6, -0.3])
+        D3 = np.diag([-0.3, -1.5, 2])
+        A = np.hstack([D1, D2, D3])
+        A_sparse = csc_matrix(A)
+        np.random.seed(0)
+
+        Z, LS, Y = projections(A)
+        Z_sparse, LS_sparse, Y_sparse = projections(A_sparse)
+        for k in range(1):
+            z = np.random.normal(size=(9,))
+            assert_array_almost_equal(Z.dot(z), Z_sparse.dot(z))
+            assert_array_almost_equal(LS.dot(z), LS_sparse.dot(z))
+            x = np.random.normal(size=(3,))
+            assert_array_almost_equal(Y.dot(x), Y_sparse.dot(x))
+
+    def test_iterative_refinements_dense(self):
+        A = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                            [0, 8, 7, 0, 1, 5, 9, 0],
+                            [1, 0, 0, 0, 0, 1, 2, 3]])
+        test_points = ([1, 2, 3, 4, 5, 6, 7, 8],
+                       [1, 10, 3, 0, 1, 6, 7, 8],
+                       [1, 0, 0, 0, 0, 1, 2, 3+1e-10])
+
+        for method in available_dense_methods:
+            Z, LS, _ = projections(A, method, orth_tol=1e-18, max_refin=10)
+            for z in test_points:
+                # Test if x is in the null_space
+                x = Z.matvec(z)
+                assert_allclose(A.dot(x), 0, rtol=0, atol=2.5e-14)
+                # Test orthogonality
+                assert_allclose(orthogonality(A, x), 0, rtol=0, atol=5e-16)
+
+    def test_rowspace_dense(self):
+        A = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                      [0, 8, 7, 0, 1, 5, 9, 0],
+                      [1, 0, 0, 0, 0, 1, 2, 3]])
+        test_points = ([1, 2, 3],
+                       [1, 10, 3],
+                       [1.12, 10, 0])
+
+        for method in available_dense_methods:
+            _, _, Y = projections(A, method)
+            for z in test_points:
+                # Test if x is solution of A x = z
+                x = Y.matvec(z)
+                assert_array_almost_equal(A.dot(x), z)
+                # Test if x is in the return row space of A
+                A_ext = np.vstack((A, x))
+                assert_equal(np.linalg.matrix_rank(A),
+                             np.linalg.matrix_rank(A_ext))
+
+
+class TestOrthogonality(TestCase):
+
+    def test_dense_matrix(self):
+        A = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                      [0, 8, 7, 0, 1, 5, 9, 0],
+                      [1, 0, 0, 0, 0, 1, 2, 3]])
+        test_vectors = ([-1.98931144, -1.56363389,
+                         -0.84115584, 2.2864762,
+                         5.599141, 0.09286976,
+                         1.37040802, -0.28145812],
+                        [697.92794044, -4091.65114008,
+                         -3327.42316335, 836.86906951,
+                         99434.98929065, -1285.37653682,
+                         -4109.21503806, 2935.29289083])
+        test_expected_orth = (0, 0)
+
+        for i in range(len(test_vectors)):
+            x = test_vectors[i]
+            orth = test_expected_orth[i]
+            assert_array_almost_equal(orthogonality(A, x), orth)
+
+    def test_sparse_matrix(self):
+        A = np.array([[1, 2, 3, 4, 0, 5, 0, 7],
+                      [0, 8, 7, 0, 1, 5, 9, 0],
+                      [1, 0, 0, 0, 0, 1, 2, 3]])
+        A = csc_matrix(A)
+        test_vectors = ([-1.98931144, -1.56363389,
+                         -0.84115584, 2.2864762,
+                         5.599141, 0.09286976,
+                         1.37040802, -0.28145812],
+                        [697.92794044, -4091.65114008,
+                         -3327.42316335, 836.86906951,
+                         99434.98929065, -1285.37653682,
+                         -4109.21503806, 2935.29289083])
+        test_expected_orth = (0, 0)
+
+        for i in range(len(test_vectors)):
+            x = test_vectors[i]
+            orth = test_expected_orth[i]
+            assert_array_almost_equal(orthogonality(A, x), orth)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_qp_subproblem.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_qp_subproblem.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0235caace6c19563efc31fdf4b8e41d9d81819b
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_qp_subproblem.py
@@ -0,0 +1,645 @@
+import numpy as np
+from scipy.sparse import csc_matrix
+from scipy.optimize._trustregion_constr.qp_subproblem \
+    import (eqp_kktfact,
+            projected_cg,
+            box_intersections,
+            sphere_intersections,
+            box_sphere_intersections,
+            modified_dogleg)
+from scipy.optimize._trustregion_constr.projections \
+    import projections
+from numpy.testing import TestCase, assert_array_almost_equal, assert_equal
+import pytest
+
+
+class TestEQPDirectFactorization(TestCase):
+
+    # From Example 16.2 Nocedal/Wright "Numerical
+    # Optimization" p.452.
+    def test_nocedal_example(self):
+        H = csc_matrix([[6, 2, 1],
+                        [2, 5, 2],
+                        [1, 2, 4]])
+        A = csc_matrix([[1, 0, 1],
+                        [0, 1, 1]])
+        c = np.array([-8, -3, -3])
+        b = -np.array([3, 0])
+        x, lagrange_multipliers = eqp_kktfact(H, c, A, b)
+        assert_array_almost_equal(x, [2, -1, 1])
+        assert_array_almost_equal(lagrange_multipliers, [3, -2])
+
+
+class TestSphericalBoundariesIntersections(TestCase):
+
+    def test_2d_sphere_constraints(self):
+        # Interior inicial point
+        ta, tb, intersect = sphere_intersections([0, 0],
+                                                 [1, 0], 0.5)
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # No intersection between line and circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [0, 1], 1)
+        assert_equal(intersect, False)
+
+        # Outside initial point pointing toward outside the circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [1, 0], 1)
+        assert_equal(intersect, False)
+
+        # Outside initial point pointing toward inside the circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [-1, 0], 1.5)
+        assert_array_almost_equal([ta, tb], [0.5, 1])
+        assert_equal(intersect, True)
+
+        # Initial point on the boundary
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [1, 0], 2)
+        assert_array_almost_equal([ta, tb], [0, 0])
+        assert_equal(intersect, True)
+
+    def test_2d_sphere_constraints_line_intersections(self):
+        # Interior initial point
+        ta, tb, intersect = sphere_intersections([0, 0],
+                                                 [1, 0], 0.5,
+                                                 entire_line=True)
+        assert_array_almost_equal([ta, tb], [-0.5, 0.5])
+        assert_equal(intersect, True)
+
+        # No intersection between line and circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [0, 1], 1,
+                                                 entire_line=True)
+        assert_equal(intersect, False)
+
+        # Outside initial point pointing toward outside the circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [1, 0], 1,
+                                                 entire_line=True)
+        assert_array_almost_equal([ta, tb], [-3, -1])
+        assert_equal(intersect, True)
+
+        # Outside initial point pointing toward inside the circle
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [-1, 0], 1.5,
+                                                 entire_line=True)
+        assert_array_almost_equal([ta, tb], [0.5, 3.5])
+        assert_equal(intersect, True)
+
+        # Initial point on the boundary
+        ta, tb, intersect = sphere_intersections([2, 0],
+                                                 [1, 0], 2,
+                                                 entire_line=True)
+        assert_array_almost_equal([ta, tb], [-4, 0])
+        assert_equal(intersect, True)
+
+
+class TestBoxBoundariesIntersections(TestCase):
+
+    def test_2d_box_constraints(self):
+        # Box constraint in the direction of vector d
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [1, 1], [3, 3])
+        assert_array_almost_equal([ta, tb], [0.5, 1])
+        assert_equal(intersect, True)
+
+        # Negative direction
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [1, -3], [3, -1])
+        assert_equal(intersect, False)
+
+        # Some constraints are absent (set to +/- inf)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-np.inf, 1],
+                                              [np.inf, np.inf])
+        assert_array_almost_equal([ta, tb], [0.5, 1])
+        assert_equal(intersect, True)
+
+        # Intersect on the face of the box
+        ta, tb, intersect = box_intersections([1, 0], [0, 1],
+                                              [1, 1], [3, 3])
+        assert_array_almost_equal([ta, tb], [1, 1])
+        assert_equal(intersect, True)
+
+        # Interior initial point
+        ta, tb, intersect = box_intersections([0, 0], [4, 4],
+                                              [-2, -3], [3, 2])
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # No intersection between line and box constraints
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, -3], [-1, -1])
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, 3], [-1, 1])
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, -np.inf],
+                                              [-1, np.inf])
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([0, 0], [1, 100],
+                                              [1, 1], [3, 3])
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([0.99, 0], [0, 2],
+                                                         [1, 1], [3, 3])
+        assert_equal(intersect, False)
+
+        # Initial point on the boundary
+        ta, tb, intersect = box_intersections([2, 2], [0, 1],
+                                              [-2, -2], [2, 2])
+        assert_array_almost_equal([ta, tb], [0, 0])
+        assert_equal(intersect, True)
+
+    def test_2d_box_constraints_entire_line(self):
+        # Box constraint in the direction of vector d
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [1, 1], [3, 3],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [0.5, 1.5])
+        assert_equal(intersect, True)
+
+        # Negative direction
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [1, -3], [3, -1],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [-1.5, -0.5])
+        assert_equal(intersect, True)
+
+        # Some constraints are absent (set to +/- inf)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-np.inf, 1],
+                                              [np.inf, np.inf],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [0.5, np.inf])
+        assert_equal(intersect, True)
+
+        # Intersect on the face of the box
+        ta, tb, intersect = box_intersections([1, 0], [0, 1],
+                                              [1, 1], [3, 3],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [1, 3])
+        assert_equal(intersect, True)
+
+        # Interior initial pointoint
+        ta, tb, intersect = box_intersections([0, 0], [4, 4],
+                                              [-2, -3], [3, 2],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [-0.5, 0.5])
+        assert_equal(intersect, True)
+
+        # No intersection between line and box constraints
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, -3], [-1, -1],
+                                              entire_line=True)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, 3], [-1, 1],
+                                              entire_line=True)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([2, 0], [0, 2],
+                                              [-3, -np.inf],
+                                              [-1, np.inf],
+                                              entire_line=True)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([0, 0], [1, 100],
+                                              [1, 1], [3, 3],
+                                              entire_line=True)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_intersections([0.99, 0], [0, 2],
+                                              [1, 1], [3, 3],
+                                              entire_line=True)
+        assert_equal(intersect, False)
+
+        # Initial point on the boundary
+        ta, tb, intersect = box_intersections([2, 2], [0, 1],
+                                              [-2, -2], [2, 2],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [-4, 0])
+        assert_equal(intersect, True)
+
+    def test_3d_box_constraints(self):
+        # Simple case
+        ta, tb, intersect = box_intersections([1, 1, 0], [0, 0, 1],
+                                              [1, 1, 1], [3, 3, 3])
+        assert_array_almost_equal([ta, tb], [1, 1])
+        assert_equal(intersect, True)
+
+        # Negative direction
+        ta, tb, intersect = box_intersections([1, 1, 0], [0, 0, -1],
+                                              [1, 1, 1], [3, 3, 3])
+        assert_equal(intersect, False)
+
+        # Interior point
+        ta, tb, intersect = box_intersections([2, 2, 2], [0, -1, 1],
+                                              [1, 1, 1], [3, 3, 3])
+        assert_array_almost_equal([ta, tb], [0, 1])
+        assert_equal(intersect, True)
+
+    def test_3d_box_constraints_entire_line(self):
+        # Simple case
+        ta, tb, intersect = box_intersections([1, 1, 0], [0, 0, 1],
+                                              [1, 1, 1], [3, 3, 3],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [1, 3])
+        assert_equal(intersect, True)
+
+        # Negative direction
+        ta, tb, intersect = box_intersections([1, 1, 0], [0, 0, -1],
+                                              [1, 1, 1], [3, 3, 3],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [-3, -1])
+        assert_equal(intersect, True)
+
+        # Interior point
+        ta, tb, intersect = box_intersections([2, 2, 2], [0, -1, 1],
+                                              [1, 1, 1], [3, 3, 3],
+                                              entire_line=True)
+        assert_array_almost_equal([ta, tb], [-1, 1])
+        assert_equal(intersect, True)
+
+
+class TestBoxSphereBoundariesIntersections(TestCase):
+
+    def test_2d_box_constraints(self):
+        # Both constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-2, 2],
+                                                     [-1, -2], [1, 2], 2,
+                                                     entire_line=False)
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # None of the constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-1, 1],
+                                                     [-1, -3], [1, 3], 10,
+                                                     entire_line=False)
+        assert_array_almost_equal([ta, tb], [0, 1])
+        assert_equal(intersect, True)
+
+        # Box constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [-1, -3], [1, 3], 10,
+                                                     entire_line=False)
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # Spherical constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [-1, -3], [1, 3], 2,
+                                                     entire_line=False)
+        assert_array_almost_equal([ta, tb], [0, 0.25])
+        assert_equal(intersect, True)
+
+        # Infeasible problems
+        ta, tb, intersect = box_sphere_intersections([2, 2], [-4, 4],
+                                                     [-1, -3], [1, 3], 2,
+                                                     entire_line=False)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [2, 4], [2, 4], 2,
+                                                     entire_line=False)
+        assert_equal(intersect, False)
+
+    def test_2d_box_constraints_entire_line(self):
+        # Both constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-2, 2],
+                                                     [-1, -2], [1, 2], 2,
+                                                     entire_line=True)
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # None of the constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-1, 1],
+                                                     [-1, -3], [1, 3], 10,
+                                                     entire_line=True)
+        assert_array_almost_equal([ta, tb], [0, 2])
+        assert_equal(intersect, True)
+
+        # Box constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [-1, -3], [1, 3], 10,
+                                                     entire_line=True)
+        assert_array_almost_equal([ta, tb], [0, 0.5])
+        assert_equal(intersect, True)
+
+        # Spherical constraints are active
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [-1, -3], [1, 3], 2,
+                                                     entire_line=True)
+        assert_array_almost_equal([ta, tb], [0, 0.25])
+        assert_equal(intersect, True)
+
+        # Infeasible problems
+        ta, tb, intersect = box_sphere_intersections([2, 2], [-4, 4],
+                                                     [-1, -3], [1, 3], 2,
+                                                     entire_line=True)
+        assert_equal(intersect, False)
+        ta, tb, intersect = box_sphere_intersections([1, 1], [-4, 4],
+                                                     [2, 4], [2, 4], 2,
+                                                     entire_line=True)
+        assert_equal(intersect, False)
+
+
+class TestModifiedDogleg(TestCase):
+
+    def test_cauchypoint_equalsto_newtonpoint(self):
+        A = np.array([[1, 8]])
+        b = np.array([-16])
+        _, _, Y = projections(A)
+        newton_point = np.array([0.24615385, 1.96923077])
+
+        # Newton point inside boundaries
+        x = modified_dogleg(A, Y, b, 2, [-np.inf, -np.inf], [np.inf, np.inf])
+        assert_array_almost_equal(x, newton_point)
+
+        # Spherical constraint active
+        x = modified_dogleg(A, Y, b, 1, [-np.inf, -np.inf], [np.inf, np.inf])
+        assert_array_almost_equal(x, newton_point/np.linalg.norm(newton_point))
+
+        # Box constraints active
+        x = modified_dogleg(A, Y, b, 2, [-np.inf, -np.inf], [0.1, np.inf])
+        assert_array_almost_equal(x, (newton_point/newton_point[0]) * 0.1)
+
+    def test_3d_example(self):
+        A = np.array([[1, 8, 1],
+                      [4, 2, 2]])
+        b = np.array([-16, 2])
+        Z, LS, Y = projections(A)
+
+        newton_point = np.array([-1.37090909, 2.23272727, -0.49090909])
+        cauchy_point = np.array([0.11165723, 1.73068711, 0.16748585])
+        origin = np.zeros_like(newton_point)
+
+        # newton_point inside boundaries
+        x = modified_dogleg(A, Y, b, 3, [-np.inf, -np.inf, -np.inf],
+                            [np.inf, np.inf, np.inf])
+        assert_array_almost_equal(x, newton_point)
+
+        # line between cauchy_point and newton_point contains best point
+        # (spherical constraint is active).
+        x = modified_dogleg(A, Y, b, 2, [-np.inf, -np.inf, -np.inf],
+                            [np.inf, np.inf, np.inf])
+        z = cauchy_point
+        d = newton_point-cauchy_point
+        t = ((x-z)/(d))
+        assert_array_almost_equal(t, np.full(3, 0.40807330))
+        assert_array_almost_equal(np.linalg.norm(x), 2)
+
+        # line between cauchy_point and newton_point contains best point
+        # (box constraint is active).
+        x = modified_dogleg(A, Y, b, 5, [-1, -np.inf, -np.inf],
+                            [np.inf, np.inf, np.inf])
+        z = cauchy_point
+        d = newton_point-cauchy_point
+        t = ((x-z)/(d))
+        assert_array_almost_equal(t, np.full(3, 0.7498195))
+        assert_array_almost_equal(x[0], -1)
+
+        # line between origin and cauchy_point contains best point
+        # (spherical constraint is active).
+        x = modified_dogleg(A, Y, b, 1, [-np.inf, -np.inf, -np.inf],
+                            [np.inf, np.inf, np.inf])
+        z = origin
+        d = cauchy_point
+        t = ((x-z)/(d))
+        assert_array_almost_equal(t, np.full(3, 0.573936265))
+        assert_array_almost_equal(np.linalg.norm(x), 1)
+
+        # line between origin and newton_point contains best point
+        # (box constraint is active).
+        x = modified_dogleg(A, Y, b, 2, [-np.inf, -np.inf, -np.inf],
+                            [np.inf, 1, np.inf])
+        z = origin
+        d = newton_point
+        t = ((x-z)/(d))
+        assert_array_almost_equal(t, np.full(3, 0.4478827364))
+        assert_array_almost_equal(x[1], 1)
+
+
+class TestProjectCG(TestCase):
+
+    # From Example 16.2 Nocedal/Wright "Numerical
+    # Optimization" p.452.
+    def test_nocedal_example(self):
+        H = csc_matrix([[6, 2, 1],
+                        [2, 5, 2],
+                        [1, 2, 4]])
+        A = csc_matrix([[1, 0, 1],
+                        [0, 1, 1]])
+        c = np.array([-8, -3, -3])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b)
+        assert_equal(info["stop_cond"], 4)
+        assert_equal(info["hits_boundary"], False)
+        assert_array_almost_equal(x, [2, -1, 1])
+
+    def test_compare_with_direct_fact(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b, tol=0)
+        x_kkt, _ = eqp_kktfact(H, c, A, b)
+        assert_equal(info["stop_cond"], 1)
+        assert_equal(info["hits_boundary"], False)
+        assert_array_almost_equal(x, x_kkt)
+
+    def test_trust_region_infeasible(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        trust_radius = 1
+        Z, _, Y = projections(A)
+        with pytest.raises(ValueError):
+            projected_cg(H, c, Z, Y, b, trust_radius=trust_radius)
+
+    def test_trust_region_barely_feasible(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        trust_radius = 2.32379000772445021283
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               trust_radius=trust_radius)
+        assert_equal(info["stop_cond"], 2)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(np.linalg.norm(x), trust_radius)
+        assert_array_almost_equal(x, -Y.dot(b))
+
+    def test_hits_boundary(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        trust_radius = 3
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               trust_radius=trust_radius)
+        assert_equal(info["stop_cond"], 2)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(np.linalg.norm(x), trust_radius)
+
+    def test_negative_curvature_unconstrained(self):
+        H = csc_matrix([[1, 2, 1, 3],
+                        [2, 0, 2, 4],
+                        [1, 2, 0, 2],
+                        [3, 4, 2, 0]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 0, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        with pytest.raises(ValueError):
+            projected_cg(H, c, Z, Y, b, tol=0)
+
+    def test_negative_curvature(self):
+        H = csc_matrix([[1, 2, 1, 3],
+                        [2, 0, 2, 4],
+                        [1, 2, 0, 2],
+                        [3, 4, 2, 0]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 0, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        trust_radius = 1000
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               trust_radius=trust_radius)
+        assert_equal(info["stop_cond"], 3)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(np.linalg.norm(x), trust_radius)
+
+    # The box constraints are inactive at the solution but
+    # are active during the iterations.
+    def test_inactive_box_constraints(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               lb=[0.5, -np.inf,
+                                   -np.inf, -np.inf],
+                               return_all=True)
+        x_kkt, _ = eqp_kktfact(H, c, A, b)
+        assert_equal(info["stop_cond"], 1)
+        assert_equal(info["hits_boundary"], False)
+        assert_array_almost_equal(x, x_kkt)
+
+    # The box constraints active and the termination is
+    # by maximum iterations (infeasible interaction).
+    def test_active_box_constraints_maximum_iterations_reached(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               lb=[0.8, -np.inf,
+                                   -np.inf, -np.inf],
+                               return_all=True)
+        assert_equal(info["stop_cond"], 1)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(A.dot(x), -b)
+        assert_array_almost_equal(x[0], 0.8)
+
+    # The box constraints are active and the termination is
+    # because it hits boundary (without infeasible interaction).
+    def test_active_box_constraints_hits_boundaries(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        trust_radius = 3
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               ub=[np.inf, np.inf, 1.6, np.inf],
+                               trust_radius=trust_radius,
+                               return_all=True)
+        assert_equal(info["stop_cond"], 2)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(x[2], 1.6)
+
+    # The box constraints are active and the termination is
+    # because it hits boundary (infeasible interaction).
+    def test_active_box_constraints_hits_boundaries_infeasible_iter(self):
+        H = csc_matrix([[6, 2, 1, 3],
+                        [2, 5, 2, 4],
+                        [1, 2, 4, 5],
+                        [3, 4, 5, 7]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 1, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        trust_radius = 4
+        Z, _, Y = projections(A)
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               ub=[np.inf, 0.1, np.inf, np.inf],
+                               trust_radius=trust_radius,
+                               return_all=True)
+        assert_equal(info["stop_cond"], 2)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(x[1], 0.1)
+
+    # The box constraints are active and the termination is
+    # because it hits boundary (no infeasible interaction).
+    def test_active_box_constraints_negative_curvature(self):
+        H = csc_matrix([[1, 2, 1, 3],
+                        [2, 0, 2, 4],
+                        [1, 2, 0, 2],
+                        [3, 4, 2, 0]])
+        A = csc_matrix([[1, 0, 1, 0],
+                        [0, 1, 0, 1]])
+        c = np.array([-2, -3, -3, 1])
+        b = -np.array([3, 0])
+        Z, _, Y = projections(A)
+        trust_radius = 1000
+        x, info = projected_cg(H, c, Z, Y, b,
+                               tol=0,
+                               ub=[np.inf, np.inf, 100, np.inf],
+                               trust_radius=trust_radius)
+        assert_equal(info["stop_cond"], 3)
+        assert_equal(info["hits_boundary"], True)
+        assert_array_almost_equal(x[2], 100)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_report.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_report.py
new file mode 100644
index 0000000000000000000000000000000000000000..c82796fea723ab043971564306d4b76bdf9f0380
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tests/test_report.py
@@ -0,0 +1,34 @@
+import pytest
+import numpy as np
+from scipy.optimize import minimize, Bounds
+
+def test_gh10880():
+    # checks that verbose reporting works with trust-constr for
+    # bound-contrained problems
+    bnds = Bounds(1, 2)
+    opts = {'maxiter': 1000, 'verbose': 2}
+    minimize(lambda x: x**2, x0=2., method='trust-constr',
+             bounds=bnds, options=opts)
+
+    opts = {'maxiter': 1000, 'verbose': 3}
+    minimize(lambda x: x**2, x0=2., method='trust-constr',
+             bounds=bnds, options=opts)
+
+@pytest.mark.xslow
+def test_gh12922():
+    # checks that verbose reporting works with trust-constr for
+    # general constraints
+    def objective(x):
+        return np.array([(np.sum((x+1)**4))])
+
+    cons = {'type': 'ineq', 'fun': lambda x: -x[0]**2}
+    n = 25
+    x0 = np.linspace(-5, 5, n)
+
+    opts = {'maxiter': 1000, 'verbose': 2}
+    minimize(objective, x0=x0, method='trust-constr',
+                      constraints=cons, options=opts)
+
+    opts = {'maxiter': 1000, 'verbose': 3}
+    minimize(objective, x0=x0, method='trust-constr',
+                      constraints=cons, options=opts)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py
new file mode 100644
index 0000000000000000000000000000000000000000..121143fad2a8df3a8986beffc5043622d9ace993
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py
@@ -0,0 +1,346 @@
+"""Trust-region interior point method.
+
+References
+----------
+.. [1] Byrd, Richard H., Mary E. Hribar, and Jorge Nocedal.
+       "An interior point algorithm for large-scale nonlinear
+       programming." SIAM Journal on Optimization 9.4 (1999): 877-900.
+.. [2] Byrd, Richard H., Guanghui Liu, and Jorge Nocedal.
+       "On the local behavior of an interior point method for
+       nonlinear programming." Numerical analysis 1997 (1997): 37-56.
+.. [3] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+       Second Edition (2006).
+"""
+
+import scipy.sparse as sps
+import numpy as np
+from .equality_constrained_sqp import equality_constrained_sqp
+from scipy.sparse.linalg import LinearOperator
+
+__all__ = ['tr_interior_point']
+
+
+class BarrierSubproblem:
+    """
+    Barrier optimization problem:
+        minimize fun(x) - barrier_parameter*sum(log(s))
+        subject to: constr_eq(x)     = 0
+                  constr_ineq(x) + s = 0
+    """
+
+    def __init__(self, x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq,
+                 constr, jac, barrier_parameter, tolerance,
+                 enforce_feasibility, global_stop_criteria,
+                 xtol, fun0, grad0, constr_ineq0, jac_ineq0, constr_eq0,
+                 jac_eq0):
+        # Store parameters
+        self.n_vars = n_vars
+        self.x0 = x0
+        self.s0 = s0
+        self.fun = fun
+        self.grad = grad
+        self.lagr_hess = lagr_hess
+        self.constr = constr
+        self.jac = jac
+        self.barrier_parameter = barrier_parameter
+        self.tolerance = tolerance
+        self.n_eq = n_eq
+        self.n_ineq = n_ineq
+        self.enforce_feasibility = enforce_feasibility
+        self.global_stop_criteria = global_stop_criteria
+        self.xtol = xtol
+        self.fun0 = self._compute_function(fun0, constr_ineq0, s0)
+        self.grad0 = self._compute_gradient(grad0)
+        self.constr0 = self._compute_constr(constr_ineq0, constr_eq0, s0)
+        self.jac0 = self._compute_jacobian(jac_eq0, jac_ineq0, s0)
+        self.terminate = False
+
+    def update(self, barrier_parameter, tolerance):
+        self.barrier_parameter = barrier_parameter
+        self.tolerance = tolerance
+
+    def get_slack(self, z):
+        return z[self.n_vars:self.n_vars+self.n_ineq]
+
+    def get_variables(self, z):
+        return z[:self.n_vars]
+
+    def function_and_constraints(self, z):
+        """Returns barrier function and constraints at given point.
+
+        For z = [x, s], returns barrier function:
+            function(z) = fun(x) - barrier_parameter*sum(log(s))
+        and barrier constraints:
+            constraints(z) = [   constr_eq(x)     ]
+                             [ constr_ineq(x) + s ]
+
+        """
+        # Get variables and slack variables
+        x = self.get_variables(z)
+        s = self.get_slack(z)
+        # Compute function and constraints
+        f = self.fun(x)
+        c_eq, c_ineq = self.constr(x)
+        # Return objective function and constraints
+        return (self._compute_function(f, c_ineq, s),
+                self._compute_constr(c_ineq, c_eq, s))
+
+    def _compute_function(self, f, c_ineq, s):
+        # Use technique from Nocedal and Wright book, ref [3]_, p.576,
+        # to guarantee constraints from `enforce_feasibility`
+        # stay feasible along iterations.
+        s[self.enforce_feasibility] = -c_ineq[self.enforce_feasibility]
+        log_s = [np.log(s_i) if s_i > 0 else -np.inf for s_i in s]
+        # Compute barrier objective function
+        return f - self.barrier_parameter*np.sum(log_s)
+
+    def _compute_constr(self, c_ineq, c_eq, s):
+        # Compute barrier constraint
+        return np.hstack((c_eq,
+                          c_ineq + s))
+
+    def scaling(self, z):
+        """Returns scaling vector.
+        Given by:
+            scaling = [ones(n_vars), s]
+        """
+        s = self.get_slack(z)
+        diag_elements = np.hstack((np.ones(self.n_vars), s))
+
+        # Diagonal matrix
+        def matvec(vec):
+            return diag_elements*vec
+        return LinearOperator((self.n_vars+self.n_ineq,
+                               self.n_vars+self.n_ineq),
+                              matvec)
+
+    def gradient_and_jacobian(self, z):
+        """Returns scaled gradient.
+
+        Return scaled gradient:
+            gradient = [             grad(x)             ]
+                       [ -barrier_parameter*ones(n_ineq) ]
+        and scaled Jacobian matrix:
+            jacobian = [  jac_eq(x)  0  ]
+                       [ jac_ineq(x) S  ]
+        Both of them scaled by the previously defined scaling factor.
+        """
+        # Get variables and slack variables
+        x = self.get_variables(z)
+        s = self.get_slack(z)
+        # Compute first derivatives
+        g = self.grad(x)
+        J_eq, J_ineq = self.jac(x)
+        # Return gradient and Jacobian
+        return (self._compute_gradient(g),
+                self._compute_jacobian(J_eq, J_ineq, s))
+
+    def _compute_gradient(self, g):
+        return np.hstack((g, -self.barrier_parameter*np.ones(self.n_ineq)))
+
+    def _compute_jacobian(self, J_eq, J_ineq, s):
+        if self.n_ineq == 0:
+            return J_eq
+        else:
+            if sps.issparse(J_eq) or sps.issparse(J_ineq):
+                # It is expected that J_eq and J_ineq
+                # are already `csr_matrix` because of
+                # the way ``BoxConstraint``, ``NonlinearConstraint``
+                # and ``LinearConstraint`` are defined.
+                J_eq = sps.csr_matrix(J_eq)
+                J_ineq = sps.csr_matrix(J_ineq)
+                return self._assemble_sparse_jacobian(J_eq, J_ineq, s)
+            else:
+                S = np.diag(s)
+                zeros = np.zeros((self.n_eq, self.n_ineq))
+                # Convert to matrix
+                if sps.issparse(J_ineq):
+                    J_ineq = J_ineq.toarray()
+                if sps.issparse(J_eq):
+                    J_eq = J_eq.toarray()
+                # Concatenate matrices
+                return np.block([[J_eq, zeros],
+                                 [J_ineq, S]])
+
+    def _assemble_sparse_jacobian(self, J_eq, J_ineq, s):
+        """Assemble sparse Jacobian given its components.
+
+        Given ``J_eq``, ``J_ineq`` and ``s`` returns:
+            jacobian = [ J_eq,     0     ]
+                       [ J_ineq, diag(s) ]
+
+        It is equivalent to:
+            sps.bmat([[ J_eq,   None    ],
+                      [ J_ineq, diag(s) ]], "csr")
+        but significantly more efficient for this
+        given structure.
+        """
+        n_vars, n_ineq, n_eq = self.n_vars, self.n_ineq, self.n_eq
+        J_aux = sps.vstack([J_eq, J_ineq], "csr")
+        indptr, indices, data = J_aux.indptr, J_aux.indices, J_aux.data
+        new_indptr = indptr + np.hstack((np.zeros(n_eq, dtype=int),
+                                         np.arange(n_ineq+1, dtype=int)))
+        size = indices.size+n_ineq
+        new_indices = np.empty(size)
+        new_data = np.empty(size)
+        mask = np.full(size, False, bool)
+        mask[new_indptr[-n_ineq:]-1] = True
+        new_indices[mask] = n_vars+np.arange(n_ineq)
+        new_indices[~mask] = indices
+        new_data[mask] = s
+        new_data[~mask] = data
+        J = sps.csr_matrix((new_data, new_indices, new_indptr),
+                           (n_eq + n_ineq, n_vars + n_ineq))
+        return J
+
+    def lagrangian_hessian_x(self, z, v):
+        """Returns Lagrangian Hessian (in relation to `x`) -> Hx"""
+        x = self.get_variables(z)
+        # Get lagrange multipliers related to nonlinear equality constraints
+        v_eq = v[:self.n_eq]
+        # Get lagrange multipliers related to nonlinear ineq. constraints
+        v_ineq = v[self.n_eq:self.n_eq+self.n_ineq]
+        lagr_hess = self.lagr_hess
+        return lagr_hess(x, v_eq, v_ineq)
+
+    def lagrangian_hessian_s(self, z, v):
+        """Returns scaled Lagrangian Hessian (in relation to`s`) -> S Hs S"""
+        s = self.get_slack(z)
+        # Using the primal formulation:
+        #     S Hs S = diag(s)*diag(barrier_parameter/s**2)*diag(s).
+        # Reference [1]_ p. 882, formula (3.1)
+        primal = self.barrier_parameter
+        # Using the primal-dual formulation
+        #     S Hs S = diag(s)*diag(v/s)*diag(s)
+        # Reference [1]_ p. 883, formula (3.11)
+        primal_dual = v[-self.n_ineq:]*s
+        # Uses the primal-dual formulation for
+        # positives values of v_ineq, and primal
+        # formulation for the remaining ones.
+        return np.where(v[-self.n_ineq:] > 0, primal_dual, primal)
+
+    def lagrangian_hessian(self, z, v):
+        """Returns scaled Lagrangian Hessian"""
+        # Compute Hessian in relation to x and s
+        Hx = self.lagrangian_hessian_x(z, v)
+        if self.n_ineq > 0:
+            S_Hs_S = self.lagrangian_hessian_s(z, v)
+
+        # The scaled Lagragian Hessian is:
+        #     [ Hx    0    ]
+        #     [ 0   S Hs S ]
+        def matvec(vec):
+            vec_x = self.get_variables(vec)
+            vec_s = self.get_slack(vec)
+            if self.n_ineq > 0:
+                return np.hstack((Hx.dot(vec_x), S_Hs_S*vec_s))
+            else:
+                return Hx.dot(vec_x)
+        return LinearOperator((self.n_vars+self.n_ineq,
+                               self.n_vars+self.n_ineq),
+                              matvec)
+
+    def stop_criteria(self, state, z, last_iteration_failed,
+                      optimality, constr_violation,
+                      trust_radius, penalty, cg_info):
+        """Stop criteria to the barrier problem.
+        The criteria here proposed is similar to formula (2.3)
+        from [1]_, p.879.
+        """
+        x = self.get_variables(z)
+        if self.global_stop_criteria(state, x,
+                                     last_iteration_failed,
+                                     trust_radius, penalty,
+                                     cg_info,
+                                     self.barrier_parameter,
+                                     self.tolerance):
+            self.terminate = True
+            return True
+        else:
+            g_cond = (optimality < self.tolerance and
+                      constr_violation < self.tolerance)
+            x_cond = trust_radius < self.xtol
+            return g_cond or x_cond
+
+
+def tr_interior_point(fun, grad, lagr_hess, n_vars, n_ineq, n_eq,
+                      constr, jac, x0, fun0, grad0,
+                      constr_ineq0, jac_ineq0, constr_eq0,
+                      jac_eq0, stop_criteria,
+                      enforce_feasibility, xtol, state,
+                      initial_barrier_parameter,
+                      initial_tolerance,
+                      initial_penalty,
+                      initial_trust_radius,
+                      factorization_method):
+    """Trust-region interior points method.
+
+    Solve problem:
+        minimize fun(x)
+        subject to: constr_ineq(x) <= 0
+                    constr_eq(x) = 0
+    using trust-region interior point method described in [1]_.
+    """
+    # BOUNDARY_PARAMETER controls the decrease on the slack
+    # variables. Represents ``tau`` from [1]_ p.885, formula (3.18).
+    BOUNDARY_PARAMETER = 0.995
+    # BARRIER_DECAY_RATIO controls the decay of the barrier parameter
+    # and of the subproblem toloerance. Represents ``theta`` from [1]_ p.879.
+    BARRIER_DECAY_RATIO = 0.2
+    # TRUST_ENLARGEMENT controls the enlargement on trust radius
+    # after each iteration
+    TRUST_ENLARGEMENT = 5
+
+    # Default enforce_feasibility
+    if enforce_feasibility is None:
+        enforce_feasibility = np.zeros(n_ineq, bool)
+    # Initial Values
+    barrier_parameter = initial_barrier_parameter
+    tolerance = initial_tolerance
+    trust_radius = initial_trust_radius
+    # Define initial value for the slack variables
+    s0 = np.maximum(-1.5*constr_ineq0, np.ones(n_ineq))
+    # Define barrier subproblem
+    subprob = BarrierSubproblem(
+        x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac,
+        barrier_parameter, tolerance, enforce_feasibility,
+        stop_criteria, xtol, fun0, grad0, constr_ineq0, jac_ineq0,
+        constr_eq0, jac_eq0)
+    # Define initial parameter for the first iteration.
+    z = np.hstack((x0, s0))
+    fun0_subprob, constr0_subprob = subprob.fun0, subprob.constr0
+    grad0_subprob, jac0_subprob = subprob.grad0, subprob.jac0
+    # Define trust region bounds
+    trust_lb = np.hstack((np.full(subprob.n_vars, -np.inf),
+                          np.full(subprob.n_ineq, -BOUNDARY_PARAMETER)))
+    trust_ub = np.full(subprob.n_vars+subprob.n_ineq, np.inf)
+
+    # Solves a sequence of barrier problems
+    while True:
+        # Solve SQP subproblem
+        z, state = equality_constrained_sqp(
+            subprob.function_and_constraints,
+            subprob.gradient_and_jacobian,
+            subprob.lagrangian_hessian,
+            z, fun0_subprob, grad0_subprob,
+            constr0_subprob, jac0_subprob, subprob.stop_criteria,
+            state, initial_penalty, trust_radius,
+            factorization_method, trust_lb, trust_ub, subprob.scaling)
+        if subprob.terminate:
+            break
+        # Update parameters
+        trust_radius = max(initial_trust_radius,
+                           TRUST_ENLARGEMENT*state.tr_radius)
+        # TODO: Use more advanced strategies from [2]_
+        # to update this parameters.
+        barrier_parameter *= BARRIER_DECAY_RATIO
+        tolerance *= BARRIER_DECAY_RATIO
+        # Update Barrier Problem
+        subprob.update(barrier_parameter, tolerance)
+        # Compute initial values for next iteration
+        fun0_subprob, constr0_subprob = subprob.function_and_constraints(z)
+        grad0_subprob, jac0_subprob = subprob.gradient_and_jacobian(z)
+
+    # Get x and s
+    x = subprob.get_variables(z)
+    return x, state
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__init__.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/__init__.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db60b89b70ac0048b6945b6ee69f80fe58af461a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/__init__.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__basinhopping.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__basinhopping.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db8fe4eef133bb28c8fa981461fa12230049c76c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__basinhopping.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__differential_evolution.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__differential_evolution.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..144125afadceb39d3d7d4b68ec1b083674c9dd1b
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__differential_evolution.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__dual_annealing.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__dual_annealing.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8681bfdb2b5a5e6a2757053020cf511f54d42e6
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__dual_annealing.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__linprog_clean_inputs.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__linprog_clean_inputs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba7ac446051be93d49d0b77398c5c70fd9b25502
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__linprog_clean_inputs.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__numdiff.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__numdiff.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a5ec63f010e9db4cd9e920c4adc4bea80c15cf0
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__numdiff.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__remove_redundancy.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__remove_redundancy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12d7a5380f2d22618b9b79cd2bff47aebd2df815
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__remove_redundancy.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__root.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__root.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a96fe0a3d39aa0e7286cf55ff10b07a2b761aefc
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__root.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__shgo.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__shgo.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f21a2551b80d3463e2a267518666dd2f73b20438
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__shgo.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__spectral.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__spectral.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b31b06bd6366ad925f3fb8fbb9741f0b2ffd297
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test__spectral.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_bracket.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_bracket.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0337b4b57329ca6edcd9ca527c52d4e432cd8ae9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_bracket.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_chandrupatla.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_chandrupatla.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..826139afc7d92cd2a8a9343f1d24d3c9a15638c9
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_chandrupatla.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyla.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyla.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e2ba45ba754993d870e4e2130d4f28ae58c8762
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyla.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyqa.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyqa.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86f36275297f198ac6997e7ece48e99b156c7054
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cobyqa.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraint_conversion.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraint_conversion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0135a4b1be99031c6028ba11b3c426fe675987b2
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraint_conversion.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraints.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraints.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81a9cef9557180d900f55657cba2a99d3c21cc25
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_constraints.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cython_optimize.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cython_optimize.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..864b689de0c6a564ca35f73ab0893df9b74a8e69
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_cython_optimize.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiable_functions.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiable_functions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..401bebb9603e17186d9311e2848d72f59e9bca5e
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiable_functions.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiate.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bacbe6cfbd6847e00d0cae45fdf1ba956ccfc02
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_differentiate.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_direct.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_direct.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86ef6646668c15a0c92c27bdadfeafcfc2edde5b
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_direct.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_extending.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_extending.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87c0f05bada377322ca2930bd8b1f18bf30321db
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_extending.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_hessian_update_strategy.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_hessian_update_strategy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d5e3b07a9ed9d80858183e35fd104be34b4bafe
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_hessian_update_strategy.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_isotonic_regression.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_isotonic_regression.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f46db760650a04930bbe6655fa65cfb31ebdc83f
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_isotonic_regression.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_hessinv.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_hessinv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..962ed38004d14aa37c6b2a418d69c4929928a8bb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_hessinv.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_setulb.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_setulb.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..102670e58e3f52a38486d80b6135fcc8a6f76ebf
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lbfgsb_setulb.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_least_squares.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_least_squares.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c18fdf3a851c9ee930334d5cdbcd4d1014b86de
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_least_squares.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linear_assignment.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linear_assignment.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9feea6a2ad9f9136b994b968871986a9fd41a12
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linear_assignment.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linesearch.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linesearch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5241ee5af44fb032fe51a3339ce3cd3bbfc6fa8b
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linesearch.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linprog.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linprog.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62273d613c2ffa27d08efd46c23e30f515d18b24
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_linprog.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_common.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_common.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..50d58fe4d824668431a48c65fe84e3a6116d43c1
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_common.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_linear.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_linear.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..370c2136296ae121373aa3cfdd0e3b6219d74ba2
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_lsq_linear.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_milp.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_milp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b01fcd0695bb3d9265863ef9f666feff2581166a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_milp.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minimize_constrained.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minimize_constrained.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b45f80932aecf191d4b23068c24139586a83f3c
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minimize_constrained.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minpack.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minpack.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7d93abbdad0d7f06c6ad2138d1371a274f17a54
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_minpack.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nnls.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nnls.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93d0d1ff9811d4fd9511a9d3908161bd8818dfb7
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nnls.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nonlin.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nonlin.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9cbbbfa9e3d6c44b2917a5ccd356380de3f39103
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_nonlin.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_optimize.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_optimize.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c710aeda26e31af45054cce40fcd4708e951e937
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_optimize.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_quadratic_assignment.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_quadratic_assignment.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ead7498921cb48ee1b433a039df00140ae6a48da
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_quadratic_assignment.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_regression.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_regression.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c4dad4944874e7fc84fbba8b93ec94341961f51
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_regression.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_slsqp.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_slsqp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6ace0779f952891358b0e7b47281d7bc2cd74cb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_slsqp.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_tnc.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_tnc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3b6d2cbfbb10442e05047e36323a6847a7054495
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_tnc.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bc3621d82382286c2984b6bd4e1d0429d52f7810
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_exact.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_exact.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d26a456628fe5b6d9c41912c341082692f8f298a
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_exact.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_krylov.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_krylov.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60bbd12e526deb91b2b6f0e95b1f3e3471e74f65
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_trustregion_krylov.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_zeros.cpython-310.pyc b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_zeros.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bb11471f67d415f8058fbd6aa0aae2caf6d193cb
Binary files /dev/null and b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/__pycache__/test_zeros.cpython-310.pyc differ
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/extending.pyx b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/extending.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..d831b3c7f5dcaee71371027c7ee95aa9ee51d157
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/extending.pyx
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+#cython: language_level=3
+#cython: boundscheck=False
+#cython: wraparound=False
+"""
+Taken from docstring for scipy.optimize.cython_optimize module.
+"""
+
+from scipy.optimize.cython_optimize cimport brentq
+
+# import math from Cython
+from libc cimport math
+
+myargs = {'C0': 1.0, 'C1': 0.7}  # a dictionary of extra arguments
+XLO, XHI = 0.5, 1.0  # lower and upper search boundaries
+XTOL, RTOL, MITR = 1e-3, 1e-3, 10  # other solver parameters
+
+# user-defined struct for extra parameters
+ctypedef struct test_params:
+    double C0
+    double C1
+
+
+# user-defined callback
+cdef double f(double x, void *args) noexcept:
+    cdef test_params *myargs = <test_params *> args
+    return myargs.C0 - math.exp(-(x - myargs.C1))
+
+
+# Cython wrapper function
+cdef double brentq_wrapper_example(dict args, double xa, double xb,
+                                    double xtol, double rtol, int mitr):
+    # Cython automatically casts dictionary to struct
+    cdef test_params myargs = args
+    return brentq(
+        f, xa, xb, <test_params *> &myargs, xtol, rtol, mitr, NULL)
+
+
+# Python function
+def brentq_example(args=myargs, xa=XLO, xb=XHI, xtol=XTOL, rtol=RTOL,
+                    mitr=MITR):
+    '''Calls Cython wrapper from Python.'''
+    return brentq_wrapper_example(args, xa, xb, xtol, rtol, mitr)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/meson.build b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/meson.build
new file mode 100644
index 0000000000000000000000000000000000000000..2a5e1535a16f840f31ca0207513e7c060767ea12
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/_cython_examples/meson.build
@@ -0,0 +1,25 @@
+project('random-build-examples', 'c', 'cpp', 'cython')
+
+fs = import('fs')
+
+py3 = import('python').find_installation(pure: false)
+
+cy = meson.get_compiler('cython')
+
+if not cy.version().version_compare('>=3.0.8')
+  error('tests requires Cython >= 3.0.8')
+endif
+
+py3.extension_module(
+  'extending',
+  'extending.pyx',
+  install: false,
+)
+
+extending_cpp = fs.copyfile('extending.pyx', 'extending_cpp.pyx')
+py3.extension_module(
+  'extending_cpp',
+  extending_cpp,
+  install: false,
+  override_options : ['cython_language=cpp']
+)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__basinhopping.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__basinhopping.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fbd376ac2c1387546a4134cbd34a9a3ac888835
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__basinhopping.py
@@ -0,0 +1,529 @@
+"""
+Unit tests for the basin hopping global minimization algorithm.
+"""
+import copy
+
+from numpy.testing import (assert_almost_equal, assert_equal, assert_,
+                           assert_allclose)
+import pytest
+from pytest import raises as assert_raises
+import numpy as np
+from numpy import cos, sin
+
+from scipy.optimize import basinhopping, OptimizeResult
+from scipy.optimize._basinhopping import (
+    Storage, RandomDisplacement, Metropolis, AdaptiveStepsize)
+
+
+def func1d(x):
+    f = cos(14.5 * x - 0.3) + (x + 0.2) * x
+    df = np.array(-14.5 * sin(14.5 * x - 0.3) + 2. * x + 0.2)
+    return f, df
+
+
+def func2d_nograd(x):
+    f = cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] + 0.2) * x[0]
+    return f
+
+
+def func2d(x):
+    f = cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] + 0.2) * x[0]
+    df = np.zeros(2)
+    df[0] = -14.5 * sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
+    df[1] = 2. * x[1] + 0.2
+    return f, df
+
+
+def func2d_easyderiv(x):
+    f = 2.0*x[0]**2 + 2.0*x[0]*x[1] + 2.0*x[1]**2 - 6.0*x[0]
+    df = np.zeros(2)
+    df[0] = 4.0*x[0] + 2.0*x[1] - 6.0
+    df[1] = 2.0*x[0] + 4.0*x[1]
+
+    return f, df
+
+
+class MyTakeStep1(RandomDisplacement):
+    """use a copy of displace, but have it set a special parameter to
+    make sure it's actually being used."""
+    def __init__(self):
+        self.been_called = False
+        super().__init__()
+
+    def __call__(self, x):
+        self.been_called = True
+        return super().__call__(x)
+
+
+def myTakeStep2(x):
+    """redo RandomDisplacement in function form without the attribute stepsize
+    to make sure everything still works ok
+    """
+    s = 0.5
+    x += np.random.uniform(-s, s, np.shape(x))
+    return x
+
+
+class MyAcceptTest:
+    """pass a custom accept test
+
+    This does nothing but make sure it's being used and ensure all the
+    possible return values are accepted
+    """
+    def __init__(self):
+        self.been_called = False
+        self.ncalls = 0
+        self.testres = [False, 'force accept', True, np.bool_(True),
+                        np.bool_(False), [], {}, 0, 1]
+
+    def __call__(self, **kwargs):
+        self.been_called = True
+        self.ncalls += 1
+        if self.ncalls - 1 < len(self.testres):
+            return self.testres[self.ncalls - 1]
+        else:
+            return True
+
+
+class MyCallBack:
+    """pass a custom callback function
+
+    This makes sure it's being used. It also returns True after 10
+    steps to ensure that it's stopping early.
+
+    """
+    def __init__(self):
+        self.been_called = False
+        self.ncalls = 0
+
+    def __call__(self, x, f, accepted):
+        self.been_called = True
+        self.ncalls += 1
+        if self.ncalls == 10:
+            return True
+
+
+class TestBasinHopping:
+
+    def setup_method(self):
+        """ Tests setup.
+
+        Run tests based on the 1-D and 2-D functions described above.
+        """
+        self.x0 = (1.0, [1.0, 1.0])
+        self.sol = (-0.195, np.array([-0.195, -0.1]))
+
+        self.tol = 3  # number of decimal places
+
+        self.niter = 100
+        self.disp = False
+
+        # fix random seed
+        np.random.seed(1234)
+
+        self.kwargs = {"method": "L-BFGS-B", "jac": True}
+        self.kwargs_nograd = {"method": "L-BFGS-B"}
+
+    def test_TypeError(self):
+        # test the TypeErrors are raised on bad input
+        i = 1
+        # if take_step is passed, it must be callable
+        assert_raises(TypeError, basinhopping, func2d, self.x0[i],
+                      take_step=1)
+        # if accept_test is passed, it must be callable
+        assert_raises(TypeError, basinhopping, func2d, self.x0[i],
+                      accept_test=1)
+
+    def test_input_validation(self):
+        msg = 'target_accept_rate has to be in range \\(0, 1\\)'
+        with assert_raises(ValueError, match=msg):
+            basinhopping(func1d, self.x0[0], target_accept_rate=0.)
+        with assert_raises(ValueError, match=msg):
+            basinhopping(func1d, self.x0[0], target_accept_rate=1.)
+
+        msg = 'stepwise_factor has to be in range \\(0, 1\\)'
+        with assert_raises(ValueError, match=msg):
+            basinhopping(func1d, self.x0[0], stepwise_factor=0.)
+        with assert_raises(ValueError, match=msg):
+            basinhopping(func1d, self.x0[0], stepwise_factor=1.)
+
+    def test_1d_grad(self):
+        # test 1-D minimizations with gradient
+        i = 0
+        res = basinhopping(func1d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=self.niter, disp=self.disp)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+
+    def test_2d(self):
+        # test 2d minimizations with gradient
+        i = 1
+        res = basinhopping(func2d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=self.niter, disp=self.disp)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+        assert_(res.nfev > 0)
+
+    def test_njev(self):
+        # test njev is returned correctly
+        i = 1
+        minimizer_kwargs = self.kwargs.copy()
+        # L-BFGS-B doesn't use njev, but BFGS does
+        minimizer_kwargs["method"] = "BFGS"
+        res = basinhopping(func2d, self.x0[i],
+                           minimizer_kwargs=minimizer_kwargs, niter=self.niter,
+                           disp=self.disp)
+        assert_(res.nfev > 0)
+        assert_equal(res.nfev, res.njev)
+
+    def test_jac(self):
+        # test Jacobian returned
+        minimizer_kwargs = self.kwargs.copy()
+        # BFGS returns a Jacobian
+        minimizer_kwargs["method"] = "BFGS"
+
+        res = basinhopping(func2d_easyderiv, [0.0, 0.0],
+                           minimizer_kwargs=minimizer_kwargs, niter=self.niter,
+                           disp=self.disp)
+
+        assert_(hasattr(res.lowest_optimization_result, "jac"))
+
+        # in this case, the Jacobian is just [df/dx, df/dy]
+        _, jacobian = func2d_easyderiv(res.x)
+        assert_almost_equal(res.lowest_optimization_result.jac, jacobian,
+                            self.tol)
+
+    def test_2d_nograd(self):
+        # test 2-D minimizations without gradient
+        i = 1
+        res = basinhopping(func2d_nograd, self.x0[i],
+                           minimizer_kwargs=self.kwargs_nograd,
+                           niter=self.niter, disp=self.disp)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+
+    @pytest.mark.fail_slow(5)
+    def test_all_minimizers(self):
+        # Test 2-D minimizations with gradient. Nelder-Mead, Powell, COBYLA, and
+        # COBYQA don't accept jac=True, so aren't included here.
+        i = 1
+        methods = ['CG', 'BFGS', 'Newton-CG', 'L-BFGS-B', 'TNC', 'SLSQP']
+        minimizer_kwargs = copy.copy(self.kwargs)
+        for method in methods:
+            minimizer_kwargs["method"] = method
+            res = basinhopping(func2d, self.x0[i],
+                               minimizer_kwargs=minimizer_kwargs,
+                               niter=self.niter, disp=self.disp)
+            assert_almost_equal(res.x, self.sol[i], self.tol)
+
+    @pytest.mark.fail_slow(10)
+    def test_all_nograd_minimizers(self):
+        # Test 2-D minimizations without gradient. Newton-CG requires jac=True,
+        # so not included here.
+        i = 1
+        methods = ['CG', 'BFGS', 'L-BFGS-B', 'TNC', 'SLSQP',
+                   'Nelder-Mead', 'Powell', 'COBYLA', 'COBYQA']
+        minimizer_kwargs = copy.copy(self.kwargs_nograd)
+        for method in methods:
+            # COBYQA takes extensive amount of time on this problem
+            niter = 10 if method == 'COBYQA' else self.niter
+            minimizer_kwargs["method"] = method
+            res = basinhopping(func2d_nograd, self.x0[i],
+                               minimizer_kwargs=minimizer_kwargs,
+                               niter=niter, disp=self.disp)
+            tol = self.tol
+            if method == 'COBYLA':
+                tol = 2
+            assert_almost_equal(res.x, self.sol[i], decimal=tol)
+
+    def test_pass_takestep(self):
+        # test that passing a custom takestep works
+        # also test that the stepsize is being adjusted
+        takestep = MyTakeStep1()
+        initial_step_size = takestep.stepsize
+        i = 1
+        res = basinhopping(func2d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=self.niter, disp=self.disp,
+                           take_step=takestep)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+        assert_(takestep.been_called)
+        # make sure that the build in adaptive step size has been used
+        assert_(initial_step_size != takestep.stepsize)
+
+    def test_pass_simple_takestep(self):
+        # test that passing a custom takestep without attribute stepsize
+        takestep = myTakeStep2
+        i = 1
+        res = basinhopping(func2d_nograd, self.x0[i],
+                           minimizer_kwargs=self.kwargs_nograd,
+                           niter=self.niter, disp=self.disp,
+                           take_step=takestep)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+
+    def test_pass_accept_test(self):
+        # test passing a custom accept test
+        # makes sure it's being used and ensures all the possible return values
+        # are accepted.
+        accept_test = MyAcceptTest()
+        i = 1
+        # there's no point in running it more than a few steps.
+        basinhopping(func2d, self.x0[i], minimizer_kwargs=self.kwargs,
+                     niter=10, disp=self.disp, accept_test=accept_test)
+        assert_(accept_test.been_called)
+
+    def test_pass_callback(self):
+        # test passing a custom callback function
+        # This makes sure it's being used. It also returns True after 10 steps
+        # to ensure that it's stopping early.
+        callback = MyCallBack()
+        i = 1
+        # there's no point in running it more than a few steps.
+        res = basinhopping(func2d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=30, disp=self.disp, callback=callback)
+        assert_(callback.been_called)
+        assert_("callback" in res.message[0])
+        # One of the calls of MyCallBack is during BasinHoppingRunner
+        # construction, so there are only 9 remaining before MyCallBack stops
+        # the minimization.
+        assert_equal(res.nit, 9)
+
+    def test_minimizer_fail(self):
+        # test if a minimizer fails
+        i = 1
+        self.kwargs["options"] = dict(maxiter=0)
+        self.niter = 10
+        res = basinhopping(func2d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=self.niter, disp=self.disp)
+        # the number of failed minimizations should be the number of
+        # iterations + 1
+        assert_equal(res.nit + 1, res.minimization_failures)
+
+    def test_niter_zero(self):
+        # gh5915, what happens if you call basinhopping with niter=0
+        i = 0
+        basinhopping(func1d, self.x0[i], minimizer_kwargs=self.kwargs,
+                     niter=0, disp=self.disp)
+
+    def test_seed_reproducibility(self):
+        # seed should ensure reproducibility between runs
+        minimizer_kwargs = {"method": "L-BFGS-B", "jac": True}
+
+        f_1 = []
+
+        def callback(x, f, accepted):
+            f_1.append(f)
+
+        basinhopping(func2d, [1.0, 1.0], minimizer_kwargs=minimizer_kwargs,
+                     niter=10, callback=callback, seed=10)
+
+        f_2 = []
+
+        def callback2(x, f, accepted):
+            f_2.append(f)
+
+        basinhopping(func2d, [1.0, 1.0], minimizer_kwargs=minimizer_kwargs,
+                     niter=10, callback=callback2, seed=10)
+        assert_equal(np.array(f_1), np.array(f_2))
+
+    def test_random_gen(self):
+        # check that np.random.Generator can be used (numpy >= 1.17)
+        rng = np.random.default_rng(1)
+
+        minimizer_kwargs = {"method": "L-BFGS-B", "jac": True}
+
+        res1 = basinhopping(func2d, [1.0, 1.0],
+                            minimizer_kwargs=minimizer_kwargs,
+                            niter=10, seed=rng)
+
+        rng = np.random.default_rng(1)
+        res2 = basinhopping(func2d, [1.0, 1.0],
+                            minimizer_kwargs=minimizer_kwargs,
+                            niter=10, seed=rng)
+        assert_equal(res1.x, res2.x)
+
+    def test_monotonic_basin_hopping(self):
+        # test 1-D minimizations with gradient and T=0
+        i = 0
+        res = basinhopping(func1d, self.x0[i], minimizer_kwargs=self.kwargs,
+                           niter=self.niter, disp=self.disp, T=0)
+        assert_almost_equal(res.x, self.sol[i], self.tol)
+
+
+class Test_Storage:
+    def setup_method(self):
+        self.x0 = np.array(1)
+        self.f0 = 0
+
+        minres = OptimizeResult(success=True)
+        minres.x = self.x0
+        minres.fun = self.f0
+
+        self.storage = Storage(minres)
+
+    def test_higher_f_rejected(self):
+        new_minres = OptimizeResult(success=True)
+        new_minres.x = self.x0 + 1
+        new_minres.fun = self.f0 + 1
+
+        ret = self.storage.update(new_minres)
+        minres = self.storage.get_lowest()
+        assert_equal(self.x0, minres.x)
+        assert_equal(self.f0, minres.fun)
+        assert_(not ret)
+
+    @pytest.mark.parametrize('success', [True, False])
+    def test_lower_f_accepted(self, success):
+        new_minres = OptimizeResult(success=success)
+        new_minres.x = self.x0 + 1
+        new_minres.fun = self.f0 - 1
+
+        ret = self.storage.update(new_minres)
+        minres = self.storage.get_lowest()
+        assert (self.x0 != minres.x) == success  # can't use `is`
+        assert (self.f0 != minres.fun) == success  # left side is NumPy bool
+        assert ret is success
+
+
+class Test_RandomDisplacement:
+    def setup_method(self):
+        self.stepsize = 1.0
+        self.displace = RandomDisplacement(stepsize=self.stepsize)
+        self.N = 300000
+        self.x0 = np.zeros([self.N])
+
+    def test_random(self):
+        # the mean should be 0
+        # the variance should be (2*stepsize)**2 / 12
+        # note these tests are random, they will fail from time to time
+        x = self.displace(self.x0)
+        v = (2. * self.stepsize) ** 2 / 12
+        assert_almost_equal(np.mean(x), 0., 1)
+        assert_almost_equal(np.var(x), v, 1)
+
+
+class Test_Metropolis:
+    def setup_method(self):
+        self.T = 2.
+        self.met = Metropolis(self.T)
+        self.res_new = OptimizeResult(success=True, fun=0.)
+        self.res_old = OptimizeResult(success=True, fun=1.)
+
+    def test_boolean_return(self):
+        # the return must be a bool, else an error will be raised in
+        # basinhopping
+        ret = self.met(res_new=self.res_new, res_old=self.res_old)
+        assert isinstance(ret, bool)
+
+    def test_lower_f_accepted(self):
+        assert_(self.met(res_new=self.res_new, res_old=self.res_old))
+
+    def test_accept(self):
+        # test that steps are randomly accepted for f_new > f_old
+        one_accept = False
+        one_reject = False
+        for i in range(1000):
+            if one_accept and one_reject:
+                break
+            res_new = OptimizeResult(success=True, fun=1.)
+            res_old = OptimizeResult(success=True, fun=0.5)
+            ret = self.met(res_new=res_new, res_old=res_old)
+            if ret:
+                one_accept = True
+            else:
+                one_reject = True
+        assert_(one_accept)
+        assert_(one_reject)
+
+    def test_GH7495(self):
+        # an overflow in exp was producing a RuntimeWarning
+        # create own object here in case someone changes self.T
+        met = Metropolis(2)
+        res_new = OptimizeResult(success=True, fun=0.)
+        res_old = OptimizeResult(success=True, fun=2000)
+        with np.errstate(over='raise'):
+            met.accept_reject(res_new=res_new, res_old=res_old)
+
+    def test_gh7799(self):
+        # gh-7799 reported a problem in which local search was successful but
+        # basinhopping returned an invalid solution. Show that this is fixed.
+        def func(x):
+            return (x**2-8)**2+(x+2)**2
+
+        x0 = -4
+        limit = 50  # Constrain to func value >= 50
+        con = {'type': 'ineq', 'fun': lambda x: func(x) - limit},
+        res = basinhopping(func, x0, 30, minimizer_kwargs={'constraints': con})
+        assert res.success
+        assert_allclose(res.fun, limit, rtol=1e-6)
+
+    def test_accept_gh7799(self):
+        # Metropolis should not accept the result of an unsuccessful new local
+        # search if the old local search was successful
+
+        met = Metropolis(0)  # monotonic basin hopping
+        res_new = OptimizeResult(success=True, fun=0.)
+        res_old = OptimizeResult(success=True, fun=1.)
+
+        # if new local search was successful and energy is lower, accept
+        assert met(res_new=res_new, res_old=res_old)
+        # if new res is unsuccessful, don't accept - even if energy is lower
+        res_new.success = False
+        assert not met(res_new=res_new, res_old=res_old)
+        # ...unless the old res was unsuccessful, too. In that case, why not?
+        res_old.success = False
+        assert met(res_new=res_new, res_old=res_old)
+
+    def test_reject_all_gh7799(self):
+        # Test the behavior when there is no feasible solution
+        def fun(x):
+            return x@x
+
+        def constraint(x):
+            return x + 1
+
+        kwargs = {'constraints': {'type': 'eq', 'fun': constraint},
+                  'bounds': [(0, 1), (0, 1)], 'method': 'slsqp'}
+        res = basinhopping(fun, x0=[2, 3], niter=10, minimizer_kwargs=kwargs)
+        assert not res.success
+
+
+class Test_AdaptiveStepsize:
+    def setup_method(self):
+        self.stepsize = 1.
+        self.ts = RandomDisplacement(stepsize=self.stepsize)
+        self.target_accept_rate = 0.5
+        self.takestep = AdaptiveStepsize(takestep=self.ts, verbose=False,
+                                         accept_rate=self.target_accept_rate)
+
+    def test_adaptive_increase(self):
+        # if few steps are rejected, the stepsize should increase
+        x = 0.
+        self.takestep(x)
+        self.takestep.report(False)
+        for i in range(self.takestep.interval):
+            self.takestep(x)
+            self.takestep.report(True)
+        assert_(self.ts.stepsize > self.stepsize)
+
+    def test_adaptive_decrease(self):
+        # if few steps are rejected, the stepsize should increase
+        x = 0.
+        self.takestep(x)
+        self.takestep.report(True)
+        for i in range(self.takestep.interval):
+            self.takestep(x)
+            self.takestep.report(False)
+        assert_(self.ts.stepsize < self.stepsize)
+
+    def test_all_accepted(self):
+        # test that everything works OK if all steps were accepted
+        x = 0.
+        for i in range(self.takestep.interval + 1):
+            self.takestep(x)
+            self.takestep.report(True)
+        assert_(self.ts.stepsize > self.stepsize)
+
+    def test_all_rejected(self):
+        # test that everything works OK if all steps were rejected
+        x = 0.
+        for i in range(self.takestep.interval + 1):
+            self.takestep(x)
+            self.takestep.report(False)
+        assert_(self.ts.stepsize < self.stepsize)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__differential_evolution.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__differential_evolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..536f928a8480fefd918c64f523baf4800b352e3c
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__differential_evolution.py
@@ -0,0 +1,1699 @@
+"""
+Unit tests for the differential global minimization algorithm.
+"""
+import multiprocessing
+from multiprocessing.dummy import Pool as ThreadPool
+import platform
+
+from scipy.optimize._differentialevolution import (DifferentialEvolutionSolver,
+                                                   _ConstraintWrapper)
+from scipy.optimize import differential_evolution, OptimizeResult
+from scipy.optimize._constraints import (Bounds, NonlinearConstraint,
+                                         LinearConstraint)
+from scipy.optimize import rosen, minimize
+from scipy.sparse import csr_matrix
+from scipy import stats
+
+import numpy as np
+from numpy.testing import (assert_equal, assert_allclose, assert_almost_equal,
+                           assert_string_equal, assert_, suppress_warnings)
+from pytest import raises as assert_raises, warns
+import pytest
+
+
+class TestDifferentialEvolutionSolver:
+
+    def setup_method(self):
+        self.old_seterr = np.seterr(invalid='raise')
+        self.limits = np.array([[0., 0.],
+                                [2., 2.]])
+        self.bounds = [(0., 2.), (0., 2.)]
+
+        self.dummy_solver = DifferentialEvolutionSolver(self.quadratic,
+                                                        [(0, 100)])
+
+        # dummy_solver2 will be used to test mutation strategies
+        self.dummy_solver2 = DifferentialEvolutionSolver(self.quadratic,
+                                                         [(0, 1)],
+                                                         popsize=7,
+                                                         mutation=0.5)
+        # create a population that's only 7 members long
+        # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
+        population = np.atleast_2d(np.arange(0.1, 0.8, 0.1)).T
+        self.dummy_solver2.population = population
+
+    def teardown_method(self):
+        np.seterr(**self.old_seterr)
+
+    def quadratic(self, x):
+        return x[0]**2
+
+    def test__strategy_resolves(self):
+        # test that the correct mutation function is resolved by
+        # different requested strategy arguments
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='best1exp')
+        assert_equal(solver.strategy, 'best1exp')
+        assert_equal(solver.mutation_func.__name__, '_best1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='best1bin')
+        assert_equal(solver.strategy, 'best1bin')
+        assert_equal(solver.mutation_func.__name__, '_best1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='rand1bin')
+        assert_equal(solver.strategy, 'rand1bin')
+        assert_equal(solver.mutation_func.__name__, '_rand1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='rand1exp')
+        assert_equal(solver.strategy, 'rand1exp')
+        assert_equal(solver.mutation_func.__name__, '_rand1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='rand2exp')
+        assert_equal(solver.strategy, 'rand2exp')
+        assert_equal(solver.mutation_func.__name__, '_rand2')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='best2bin')
+        assert_equal(solver.strategy, 'best2bin')
+        assert_equal(solver.mutation_func.__name__, '_best2')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='rand2bin')
+        assert_equal(solver.strategy, 'rand2bin')
+        assert_equal(solver.mutation_func.__name__, '_rand2')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='rand2exp')
+        assert_equal(solver.strategy, 'rand2exp')
+        assert_equal(solver.mutation_func.__name__, '_rand2')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='randtobest1bin')
+        assert_equal(solver.strategy, 'randtobest1bin')
+        assert_equal(solver.mutation_func.__name__, '_randtobest1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='randtobest1exp')
+        assert_equal(solver.strategy, 'randtobest1exp')
+        assert_equal(solver.mutation_func.__name__, '_randtobest1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='currenttobest1bin')
+        assert_equal(solver.strategy, 'currenttobest1bin')
+        assert_equal(solver.mutation_func.__name__, '_currenttobest1')
+
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='currenttobest1exp')
+        assert_equal(solver.strategy, 'currenttobest1exp')
+        assert_equal(solver.mutation_func.__name__, '_currenttobest1')
+
+    def test__mutate1(self):
+        # strategies */1/*, i.e. rand/1/bin, best/1/exp, etc.
+        result = np.array([0.05])
+        trial = self.dummy_solver2._best1(np.array([2, 3, 4, 5, 6]))
+        assert_allclose(trial, result)
+
+        result = np.array([0.25])
+        trial = self.dummy_solver2._rand1(np.array([2, 3, 4, 5, 6]))
+        assert_allclose(trial, result)
+
+    def test__mutate2(self):
+        # strategies */2/*, i.e. rand/2/bin, best/2/exp, etc.
+        # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
+
+        result = np.array([-0.1])
+        trial = self.dummy_solver2._best2(np.array([2, 3, 4, 5, 6]))
+        assert_allclose(trial, result)
+
+        result = np.array([0.1])
+        trial = self.dummy_solver2._rand2(np.array([2, 3, 4, 5, 6]))
+        assert_allclose(trial, result)
+
+    def test__randtobest1(self):
+        # strategies randtobest/1/*
+        result = np.array([0.15])
+        trial = self.dummy_solver2._randtobest1(np.array([2, 3, 4, 5, 6]))
+        assert_allclose(trial, result)
+
+    def test__currenttobest1(self):
+        # strategies currenttobest/1/*
+        result = np.array([0.1])
+        trial = self.dummy_solver2._currenttobest1(
+            1,
+            np.array([2, 3, 4, 5, 6])
+        )
+        assert_allclose(trial, result)
+
+    def test_can_init_with_dithering(self):
+        mutation = (0.5, 1)
+        solver = DifferentialEvolutionSolver(self.quadratic,
+                                             self.bounds,
+                                             mutation=mutation)
+
+        assert_equal(solver.dither, list(mutation))
+
+    def test_invalid_mutation_values_arent_accepted(self):
+        func = rosen
+        mutation = (0.5, 3)
+        assert_raises(ValueError,
+                          DifferentialEvolutionSolver,
+                          func,
+                          self.bounds,
+                          mutation=mutation)
+
+        mutation = (-1, 1)
+        assert_raises(ValueError,
+                          DifferentialEvolutionSolver,
+                          func,
+                          self.bounds,
+                          mutation=mutation)
+
+        mutation = (0.1, np.nan)
+        assert_raises(ValueError,
+                          DifferentialEvolutionSolver,
+                          func,
+                          self.bounds,
+                          mutation=mutation)
+
+        mutation = 0.5
+        solver = DifferentialEvolutionSolver(func,
+                                             self.bounds,
+                                             mutation=mutation)
+        assert_equal(0.5, solver.scale)
+        assert_equal(None, solver.dither)
+
+    def test_invalid_functional(self):
+        def func(x):
+            return np.array([np.sum(x ** 2), np.sum(x)])
+
+        with assert_raises(
+                RuntimeError,
+                match=r"func\(x, \*args\) must return a scalar value"):
+            differential_evolution(func, [(-2, 2), (-2, 2)])
+
+    def test__scale_parameters(self):
+        trial = np.array([0.3])
+        assert_equal(30, self.dummy_solver._scale_parameters(trial))
+
+        # it should also work with the limits reversed
+        self.dummy_solver.limits = np.array([[100], [0.]])
+        assert_equal(30, self.dummy_solver._scale_parameters(trial))
+
+    def test__unscale_parameters(self):
+        trial = np.array([30])
+        assert_equal(0.3, self.dummy_solver._unscale_parameters(trial))
+
+        # it should also work with the limits reversed
+        self.dummy_solver.limits = np.array([[100], [0.]])
+        assert_equal(0.3, self.dummy_solver._unscale_parameters(trial))
+
+    def test_equal_bounds(self):
+        with np.errstate(invalid='raise'):
+            solver = DifferentialEvolutionSolver(
+                self.quadratic,
+                bounds=[(2.0, 2.0), (1.0, 3.0)]
+            )
+            v = solver._unscale_parameters([2.0, 2.0])
+            assert_allclose(v, 0.5)
+
+        res = differential_evolution(self.quadratic, [(2.0, 2.0), (3.0, 3.0)])
+        assert_equal(res.x, [2.0, 3.0])
+
+    def test__ensure_constraint(self):
+        trial = np.array([1.1, -100, 0.9, 2., 300., -0.00001])
+        self.dummy_solver._ensure_constraint(trial)
+
+        assert_equal(trial[2], 0.9)
+        assert_(np.logical_and(trial >= 0, trial <= 1).all())
+
+    def test_differential_evolution(self):
+        # test that the Jmin of DifferentialEvolutionSolver
+        # is the same as the function evaluation
+        solver = DifferentialEvolutionSolver(
+            self.quadratic, [(-2, 2)], maxiter=1, polish=False
+        )
+        result = solver.solve()
+        assert_equal(result.fun, self.quadratic(result.x))
+
+        solver = DifferentialEvolutionSolver(
+            self.quadratic, [(-2, 2)], maxiter=1, polish=True
+        )
+        result = solver.solve()
+        assert_equal(result.fun, self.quadratic(result.x))
+
+    def test_best_solution_retrieval(self):
+        # test that the getter property method for the best solution works.
+        solver = DifferentialEvolutionSolver(self.quadratic, [(-2, 2)])
+        result = solver.solve()
+        assert_equal(result.x, solver.x)
+
+    def test_intermediate_result(self):
+        # Check that intermediate result object passed into the callback
+        # function contains the expected information and that raising
+        # `StopIteration` causes the expected behavior.
+        maxiter = 10
+
+        def func(x):
+            val = rosen(x)
+            if val < func.val:
+                func.x = x
+                func.val = val
+            return val
+        func.x = None
+        func.val = np.inf
+
+        def callback(intermediate_result):
+            callback.nit += 1
+            callback.intermediate_result = intermediate_result
+            assert intermediate_result.population.ndim == 2
+            assert intermediate_result.population.shape[1] == 2
+            assert intermediate_result.nit == callback.nit
+
+            # Check that `x` and `fun` attributes are the best found so far
+            assert_equal(intermediate_result.x, callback.func.x)
+            assert_equal(intermediate_result.fun, callback.func.val)
+
+            # Check for consistency between `fun`, `population_energies`,
+            # `x`, and `population`
+            assert_equal(intermediate_result.fun, rosen(intermediate_result.x))
+            for i in range(len(intermediate_result.population_energies)):
+                res = intermediate_result.population_energies[i]
+                ref = rosen(intermediate_result.population[i])
+                assert_equal(res, ref)
+            assert_equal(intermediate_result.x,
+                         intermediate_result.population[0])
+            assert_equal(intermediate_result.fun,
+                         intermediate_result.population_energies[0])
+
+            assert intermediate_result.message == 'in progress'
+            assert intermediate_result.success is True
+            assert isinstance(intermediate_result, OptimizeResult)
+            if callback.nit == maxiter:
+                raise StopIteration
+        callback.nit = 0
+        callback.intermediate_result = None
+        callback.func = func
+
+        bounds = [(0, 2), (0, 2)]
+        kwargs = dict(func=func, bounds=bounds, seed=838245, polish=False)
+        res = differential_evolution(**kwargs, callback=callback)
+        ref = differential_evolution(**kwargs, maxiter=maxiter)
+
+        # Check that final `intermediate_result` is equivalent to returned
+        # result object and that terminating with callback `StopIteration`
+        # after `maxiter` iterations is equivalent to terminating with
+        # `maxiter` parameter.
+        assert res.success is ref.success is False
+        assert callback.nit == res.nit == maxiter
+        assert res.message == 'callback function requested stop early'
+        assert ref.message == 'Maximum number of iterations has been exceeded.'
+        for field, val in ref.items():
+            if field in {'message', 'success'}:  # checked separately
+                continue
+            assert_equal(callback.intermediate_result[field], val)
+            assert_equal(res[field], val)
+
+        # Check that polish occurs after `StopIteration` as advertised
+        callback.nit = 0
+        func.val = np.inf
+        kwargs['polish'] = True
+        res = differential_evolution(**kwargs, callback=callback)
+        assert res.fun < ref.fun
+
+    def test_callback_terminates(self):
+        # test that if the callback returns true, then the minimization halts
+        bounds = [(0, 2), (0, 2)]
+        expected_msg = 'callback function requested stop early'
+        def callback_python_true(param, convergence=0.):
+            return True
+
+        result = differential_evolution(
+            rosen, bounds, callback=callback_python_true
+        )
+        assert_string_equal(result.message, expected_msg)
+
+        # if callback raises StopIteration then solve should be interrupted
+        def callback_stop(intermediate_result):
+            raise StopIteration
+
+        result = differential_evolution(rosen, bounds, callback=callback_stop)
+        assert not result.success
+
+        def callback_evaluates_true(param, convergence=0.):
+            # DE should stop if bool(self.callback) is True
+            return [10]
+
+        result = differential_evolution(rosen, bounds, callback=callback_evaluates_true)
+        assert_string_equal(result.message, expected_msg)
+        assert not result.success
+
+        def callback_evaluates_false(param, convergence=0.):
+            return []
+
+        result = differential_evolution(rosen, bounds,
+                                        callback=callback_evaluates_false)
+        assert result.success
+
+    def test_args_tuple_is_passed(self):
+        # test that the args tuple is passed to the cost function properly.
+        bounds = [(-10, 10)]
+        args = (1., 2., 3.)
+
+        def quadratic(x, *args):
+            if type(args) != tuple:
+                raise ValueError('args should be a tuple')
+            return args[0] + args[1] * x + args[2] * x**2.
+
+        result = differential_evolution(quadratic,
+                                        bounds,
+                                        args=args,
+                                        polish=True)
+        assert_almost_equal(result.fun, 2 / 3.)
+
+    def test_init_with_invalid_strategy(self):
+        # test that passing an invalid strategy raises ValueError
+        func = rosen
+        bounds = [(-3, 3)]
+        assert_raises(ValueError,
+                          differential_evolution,
+                          func,
+                          bounds,
+                          strategy='abc')
+
+    def test_bounds_checking(self):
+        # test that the bounds checking works
+        func = rosen
+        bounds = [(-3)]
+        assert_raises(ValueError,
+                          differential_evolution,
+                          func,
+                          bounds)
+        bounds = [(-3, 3), (3, 4, 5)]
+        assert_raises(ValueError,
+                          differential_evolution,
+                          func,
+                          bounds)
+
+        # test that we can use a new-type Bounds object
+        result = differential_evolution(rosen, Bounds([0, 0], [2, 2]))
+        assert_almost_equal(result.x, (1., 1.))
+
+    def test_select_samples(self):
+        # select_samples should return 5 separate random numbers.
+        limits = np.arange(12., dtype='float64').reshape(2, 6)
+        bounds = list(zip(limits[0, :], limits[1, :]))
+        solver = DifferentialEvolutionSolver(None, bounds, popsize=1)
+        candidate = 0
+        r1, r2, r3, r4, r5 = solver._select_samples(candidate, 5)
+        assert_equal(
+            len(np.unique(np.array([candidate, r1, r2, r3, r4, r5]))), 6)
+
+    def test_maxiter_stops_solve(self):
+        # test that if the maximum number of iterations is exceeded
+        # the solver stops.
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, maxiter=1)
+        result = solver.solve()
+        assert_equal(result.success, False)
+        assert_equal(result.message,
+                        'Maximum number of iterations has been exceeded.')
+
+    def test_maxfun_stops_solve(self):
+        # test that if the maximum number of function evaluations is exceeded
+        # during initialisation the solver stops
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, maxfun=1,
+                                             polish=False)
+        result = solver.solve()
+
+        assert_equal(result.nfev, 2)
+        assert_equal(result.success, False)
+        assert_equal(result.message,
+                     'Maximum number of function evaluations has '
+                     'been exceeded.')
+
+        # test that if the maximum number of function evaluations is exceeded
+        # during the actual minimisation, then the solver stops.
+        # Have to turn polishing off, as this will still occur even if maxfun
+        # is reached. For popsize=5 and len(bounds)=2, then there are only 10
+        # function evaluations during initialisation.
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             popsize=5,
+                                             polish=False,
+                                             maxfun=40)
+        result = solver.solve()
+
+        assert_equal(result.nfev, 41)
+        assert_equal(result.success, False)
+        assert_equal(result.message,
+                     'Maximum number of function evaluations has '
+                     'been exceeded.')
+
+        # now repeat for updating='deferred version
+        # 47 function evaluations is not a multiple of the population size,
+        # so maxfun is reached partway through a population evaluation.
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             popsize=5,
+                                             polish=False,
+                                             maxfun=47,
+                                             updating='deferred')
+        result = solver.solve()
+
+        assert_equal(result.nfev, 47)
+        assert_equal(result.success, False)
+        assert_equal(result.message,
+                     'Maximum number of function evaluations has '
+                     'been reached.')
+
+    def test_quadratic(self):
+        # test the quadratic function from object
+        solver = DifferentialEvolutionSolver(self.quadratic,
+                                             [(-100, 100)],
+                                             tol=0.02)
+        solver.solve()
+        assert_equal(np.argmin(solver.population_energies), 0)
+
+    def test_quadratic_from_diff_ev(self):
+        # test the quadratic function from differential_evolution function
+        differential_evolution(self.quadratic,
+                               [(-100, 100)],
+                               tol=0.02)
+
+    def test_seed_gives_repeatability(self):
+        result = differential_evolution(self.quadratic,
+                                        [(-100, 100)],
+                                        polish=False,
+                                        seed=1,
+                                        tol=0.5)
+        result2 = differential_evolution(self.quadratic,
+                                        [(-100, 100)],
+                                        polish=False,
+                                        seed=1,
+                                        tol=0.5)
+        assert_equal(result.x, result2.x)
+        assert_equal(result.nfev, result2.nfev)
+
+    def test_random_generator(self):
+        # check that np.random.Generator can be used (numpy >= 1.17)
+        # obtain a np.random.Generator object
+        rng = np.random.default_rng()
+
+        inits = ['random', 'latinhypercube', 'sobol', 'halton']
+        for init in inits:
+            differential_evolution(self.quadratic,
+                                   [(-100, 100)],
+                                   polish=False,
+                                   seed=rng,
+                                   tol=0.5,
+                                   init=init)
+
+    def test_exp_runs(self):
+        # test whether exponential mutation loop runs
+        solver = DifferentialEvolutionSolver(rosen,
+                                             self.bounds,
+                                             strategy='best1exp',
+                                             maxiter=1)
+
+        solver.solve()
+
+    def test_gh_4511_regression(self):
+        # This modification of the differential evolution docstring example
+        # uses a custom popsize that had triggered an off-by-one error.
+        # Because we do not care about solving the optimization problem in
+        # this test, we use maxiter=1 to reduce the testing time.
+        bounds = [(-5, 5), (-5, 5)]
+        # result = differential_evolution(rosen, bounds, popsize=1815,
+        #                                 maxiter=1)
+
+        # the original issue arose because of rounding error in arange, with
+        # linspace being a much better solution. 1815 is quite a large popsize
+        # to use and results in a long test time (~13s). I used the original
+        # issue to figure out the lowest number of samples that would cause
+        # this rounding error to occur, 49.
+        differential_evolution(rosen, bounds, popsize=49, maxiter=1)
+
+    def test_calculate_population_energies(self):
+        # if popsize is 3, then the overall generation has size (6,)
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, popsize=3)
+        solver._calculate_population_energies(solver.population)
+        solver._promote_lowest_energy()
+        assert_equal(np.argmin(solver.population_energies), 0)
+
+        # initial calculation of the energies should require 6 nfev.
+        assert_equal(solver._nfev, 6)
+
+    def test_iteration(self):
+        # test that DifferentialEvolutionSolver is iterable
+        # if popsize is 3, then the overall generation has size (6,)
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, popsize=3,
+                                             maxfun=12)
+        x, fun = next(solver)
+        assert_equal(np.size(x, 0), 2)
+
+        # 6 nfev are required for initial calculation of energies, 6 nfev are
+        # required for the evolution of the 6 population members.
+        assert_equal(solver._nfev, 12)
+
+        # the next generation should halt because it exceeds maxfun
+        assert_raises(StopIteration, next, solver)
+
+        # check a proper minimisation can be done by an iterable solver
+        solver = DifferentialEvolutionSolver(rosen, self.bounds)
+        _, fun_prev = next(solver)
+        for i, soln in enumerate(solver):
+            x_current, fun_current = soln
+            assert fun_prev >= fun_current
+            _, fun_prev = x_current, fun_current
+            # need to have this otherwise the solver would never stop.
+            if i == 50:
+                break
+
+    def test_convergence(self):
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, tol=0.2,
+                                             polish=False)
+        solver.solve()
+        assert_(solver.convergence < 0.2)
+
+    def test_maxiter_none_GH5731(self):
+        # Pre 0.17 the previous default for maxiter and maxfun was None.
+        # the numerical defaults are now 1000 and np.inf. However, some scripts
+        # will still supply None for both of those, this will raise a TypeError
+        # in the solve method.
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, maxiter=None,
+                                             maxfun=None)
+        solver.solve()
+
+    def test_population_initiation(self):
+        # test the different modes of population initiation
+
+        # init must be either 'latinhypercube' or 'random'
+        # raising ValueError is something else is passed in
+        assert_raises(ValueError,
+                      DifferentialEvolutionSolver,
+                      *(rosen, self.bounds),
+                      **{'init': 'rubbish'})
+
+        solver = DifferentialEvolutionSolver(rosen, self.bounds)
+
+        # check that population initiation:
+        # 1) resets _nfev to 0
+        # 2) all population energies are np.inf
+        solver.init_population_random()
+        assert_equal(solver._nfev, 0)
+        assert_(np.all(np.isinf(solver.population_energies)))
+
+        solver.init_population_lhs()
+        assert_equal(solver._nfev, 0)
+        assert_(np.all(np.isinf(solver.population_energies)))
+
+        solver.init_population_qmc(qmc_engine='halton')
+        assert_equal(solver._nfev, 0)
+        assert_(np.all(np.isinf(solver.population_energies)))
+
+        solver = DifferentialEvolutionSolver(rosen, self.bounds, init='sobol')
+        solver.init_population_qmc(qmc_engine='sobol')
+        assert_equal(solver._nfev, 0)
+        assert_(np.all(np.isinf(solver.population_energies)))
+
+        # we should be able to initialize with our own array
+        population = np.linspace(-1, 3, 10).reshape(5, 2)
+        solver = DifferentialEvolutionSolver(rosen, self.bounds,
+                                             init=population,
+                                             strategy='best2bin',
+                                             atol=0.01, seed=1, popsize=5)
+
+        assert_equal(solver._nfev, 0)
+        assert_(np.all(np.isinf(solver.population_energies)))
+        assert_(solver.num_population_members == 5)
+        assert_(solver.population_shape == (5, 2))
+
+        # check that the population was initialized correctly
+        unscaled_population = np.clip(solver._unscale_parameters(population),
+                                      0, 1)
+        assert_almost_equal(solver.population[:5], unscaled_population)
+
+        # population values need to be clipped to bounds
+        assert_almost_equal(np.min(solver.population[:5]), 0)
+        assert_almost_equal(np.max(solver.population[:5]), 1)
+
+        # shouldn't be able to initialize with an array if it's the wrong shape
+        # this would have too many parameters
+        population = np.linspace(-1, 3, 15).reshape(5, 3)
+        assert_raises(ValueError,
+                      DifferentialEvolutionSolver,
+                      *(rosen, self.bounds),
+                      **{'init': population})
+
+        # provide an initial solution
+        # bounds are [(0, 2), (0, 2)]
+        x0 = np.random.uniform(low=0.0, high=2.0, size=2)
+        solver = DifferentialEvolutionSolver(
+            rosen, self.bounds, x0=x0
+        )
+        # parameters are scaled to unit interval
+        assert_allclose(solver.population[0], x0 / 2.0)
+
+    def test_x0(self):
+        # smoke test that checks that x0 is usable.
+        res = differential_evolution(rosen, self.bounds, x0=[0.2, 0.8])
+        assert res.success
+
+        # check what happens if some of the x0 lay outside the bounds
+        with assert_raises(ValueError):
+            differential_evolution(rosen, self.bounds, x0=[0.2, 2.1])
+
+    def test_infinite_objective_function(self):
+        # Test that there are no problems if the objective function
+        # returns inf on some runs
+        def sometimes_inf(x):
+            if x[0] < .5:
+                return np.inf
+            return x[1]
+        bounds = [(0, 1), (0, 1)]
+        differential_evolution(sometimes_inf, bounds=bounds, disp=False)
+
+    def test_deferred_updating(self):
+        # check setting of deferred updating, with default workers
+        bounds = [(0., 2.), (0., 2.)]
+        solver = DifferentialEvolutionSolver(rosen, bounds, updating='deferred')
+        assert_(solver._updating == 'deferred')
+        assert_(solver._mapwrapper._mapfunc is map)
+        res = solver.solve()
+        assert res.success
+
+        # check that deferred updating works with an exponential crossover
+        res = differential_evolution(
+            rosen, bounds, updating='deferred', strategy='best1exp'
+        )
+        assert res.success
+
+    def test_immediate_updating(self):
+        # check setting of immediate updating, with default workers
+        bounds = [(0., 2.), (0., 2.)]
+        solver = DifferentialEvolutionSolver(rosen, bounds)
+        assert_(solver._updating == 'immediate')
+
+        # Safely forking from a multithreaded process is
+        # problematic, and deprecated in Python 3.12, so
+        # we use a slower but portable alternative
+        # see gh-19848
+        ctx = multiprocessing.get_context("spawn")
+        with ctx.Pool(2) as p:
+            # should raise a UserWarning because the updating='immediate'
+            # is being overridden by the workers keyword
+            with warns(UserWarning):
+                with DifferentialEvolutionSolver(rosen, bounds, workers=p.map) as s:
+                    pass
+            assert s._updating == 'deferred'
+
+    @pytest.mark.fail_slow(5)
+    def test_parallel(self):
+        # smoke test for parallelization with deferred updating
+        bounds = [(0., 2.), (0., 2.)]
+        # use threads instead of Process to speed things up for this simple example
+        with ThreadPool(2) as p, DifferentialEvolutionSolver(
+            rosen, bounds, updating='deferred', workers=p.map, tol=0.1, popsize=3
+        ) as solver:
+            assert solver._mapwrapper.pool is not None
+            assert solver._updating == 'deferred'
+            solver.solve()
+
+        with DifferentialEvolutionSolver(
+            rosen, bounds, updating='deferred', workers=2, popsize=3, tol=0.1
+        ) as solver:
+            assert solver._mapwrapper.pool is not None
+            assert solver._updating == 'deferred'
+            solver.solve()
+
+    def test_converged(self):
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)])
+        solver.solve()
+        assert_(solver.converged())
+
+    def test_constraint_violation_fn(self):
+        def constr_f(x):
+            return [x[0] + x[1]]
+
+        def constr_f2(x):
+            return np.array([x[0]**2 + x[1], x[0] - x[1]])
+
+        nlc = NonlinearConstraint(constr_f, -np.inf, 1.9)
+
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc,))
+
+        cv = solver._constraint_violation_fn(np.array([1.0, 1.0]))
+        assert_almost_equal(cv, 0.1)
+
+        nlc2 = NonlinearConstraint(constr_f2, -np.inf, 1.8)
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc, nlc2))
+
+        # for multiple constraints the constraint violations should
+        # be concatenated.
+        xs = [(1.2, 1), (2.0, 2.0), (0.5, 0.5)]
+        vs = [(0.3, 0.64, 0.0), (2.1, 4.2, 0.0), (0, 0, 0)]
+
+        for x, v in zip(xs, vs):
+            cv = solver._constraint_violation_fn(np.array(x))
+            assert_allclose(cv, np.atleast_2d(v))
+
+        # vectorized calculation of a series of solutions
+        assert_allclose(
+            solver._constraint_violation_fn(np.array(xs)), np.array(vs)
+        )
+
+        # the following line is used in _calculate_population_feasibilities.
+        # _constraint_violation_fn returns an (1, M) array when
+        # x.shape == (N,), i.e. a single solution. Therefore this list
+        # comprehension should generate (S, 1, M) array.
+        constraint_violation = np.array([solver._constraint_violation_fn(x)
+                                         for x in np.array(xs)])
+        assert constraint_violation.shape == (3, 1, 3)
+
+        # we need reasonable error messages if the constraint function doesn't
+        # return the right thing
+        def constr_f3(x):
+            # returns (S, M), rather than (M, S)
+            return constr_f2(x).T
+
+        nlc2 = NonlinearConstraint(constr_f3, -np.inf, 1.8)
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc, nlc2),
+                                             vectorized=False)
+        solver.vectorized = True
+        with pytest.raises(
+                RuntimeError, match="An array returned from a Constraint"
+        ):
+            solver._constraint_violation_fn(np.array(xs))
+
+    def test_constraint_population_feasibilities(self):
+        def constr_f(x):
+            return [x[0] + x[1]]
+
+        def constr_f2(x):
+            return [x[0]**2 + x[1], x[0] - x[1]]
+
+        nlc = NonlinearConstraint(constr_f, -np.inf, 1.9)
+
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc,))
+
+        # are population feasibilities correct
+        # [0.5, 0.5] corresponds to scaled values of [1., 1.]
+        feas, cv = solver._calculate_population_feasibilities(
+            np.array([[0.5, 0.5], [1., 1.]]))
+        assert_equal(feas, [False, False])
+        assert_almost_equal(cv, np.array([[0.1], [2.1]]))
+        assert cv.shape == (2, 1)
+
+        nlc2 = NonlinearConstraint(constr_f2, -np.inf, 1.8)
+
+        for vectorize in [False, True]:
+            solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                                 constraints=(nlc, nlc2),
+                                                 vectorized=vectorize,
+                                                 updating='deferred')
+
+            feas, cv = solver._calculate_population_feasibilities(
+                np.array([[0.5, 0.5], [0.6, 0.5]]))
+            assert_equal(feas, [False, False])
+            assert_almost_equal(cv, np.array([[0.1, 0.2, 0], [0.3, 0.64, 0]]))
+
+            feas, cv = solver._calculate_population_feasibilities(
+                np.array([[0.5, 0.5], [1., 1.]]))
+            assert_equal(feas, [False, False])
+            assert_almost_equal(cv, np.array([[0.1, 0.2, 0], [2.1, 4.2, 0]]))
+            assert cv.shape == (2, 3)
+
+            feas, cv = solver._calculate_population_feasibilities(
+                np.array([[0.25, 0.25], [1., 1.]]))
+            assert_equal(feas, [True, False])
+            assert_almost_equal(cv, np.array([[0.0, 0.0, 0.], [2.1, 4.2, 0]]))
+            assert cv.shape == (2, 3)
+
+    def test_constraint_solve(self):
+        def constr_f(x):
+            return np.array([x[0] + x[1]])
+
+        nlc = NonlinearConstraint(constr_f, -np.inf, 1.9)
+
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc,))
+
+        # trust-constr warns if the constraint function is linear
+        with warns(UserWarning):
+            res = solver.solve()
+
+        assert constr_f(res.x) <= 1.9
+        assert res.success
+
+    @pytest.mark.fail_slow(5)
+    def test_impossible_constraint(self):
+        def constr_f(x):
+            return np.array([x[0] + x[1]])
+
+        nlc = NonlinearConstraint(constr_f, -np.inf, -1)
+
+        solver = DifferentialEvolutionSolver(
+            rosen, [(0, 2), (0, 2)], constraints=(nlc,), popsize=1, seed=1, maxiter=100
+        )
+
+        # a UserWarning is issued because the 'trust-constr' polishing is
+        # attempted on the least infeasible solution found.
+        with warns(UserWarning):
+            res = solver.solve()
+
+        assert res.maxcv > 0
+        assert not res.success
+
+        # test _promote_lowest_energy works when none of the population is
+        # feasible. In this case, the solution with the lowest constraint
+        # violation should be promoted.
+        solver = DifferentialEvolutionSolver(
+            rosen, [(0, 2), (0, 2)], constraints=(nlc,), polish=False)
+        next(solver)
+        assert not solver.feasible.all()
+        assert not np.isfinite(solver.population_energies).all()
+
+        # now swap two of the entries in the population
+        l = 20
+        cv = solver.constraint_violation[0]
+
+        solver.population_energies[[0, l]] = solver.population_energies[[l, 0]]
+        solver.population[[0, l], :] = solver.population[[l, 0], :]
+        solver.constraint_violation[[0, l], :] = (
+            solver.constraint_violation[[l, 0], :])
+
+        solver._promote_lowest_energy()
+        assert_equal(solver.constraint_violation[0], cv)
+
+    def test_accept_trial(self):
+        # _accept_trial(self, energy_trial, feasible_trial, cv_trial,
+        #               energy_orig, feasible_orig, cv_orig)
+        def constr_f(x):
+            return [x[0] + x[1]]
+        nlc = NonlinearConstraint(constr_f, -np.inf, 1.9)
+        solver = DifferentialEvolutionSolver(rosen, [(0, 2), (0, 2)],
+                                             constraints=(nlc,))
+        fn = solver._accept_trial
+        # both solutions are feasible, select lower energy
+        assert fn(0.1, True, np.array([0.]), 1.0, True, np.array([0.]))
+        assert (fn(1.0, True, np.array([0.0]), 0.1, True, np.array([0.0])) is False)
+        assert fn(0.1, True, np.array([0.]), 0.1, True, np.array([0.]))
+
+        # trial is feasible, original is not
+        assert fn(9.9, True, np.array([0.]), 1.0, False, np.array([1.]))
+
+        # trial and original are infeasible
+        # cv_trial have to be <= cv_original to be better
+        assert (fn(0.1, False, np.array([0.5, 0.5]),
+                   1.0, False, np.array([1., 1.0])))
+        assert (fn(0.1, False, np.array([0.5, 0.5]),
+                   1.0, False, np.array([1., 0.50])))
+        assert not (fn(1.0, False, np.array([0.5, 0.5]),
+                       1.0, False, np.array([1.0, 0.4])))
+
+    def test_constraint_wrapper(self):
+        lb = np.array([0, 20, 30])
+        ub = np.array([0.5, np.inf, 70])
+        x0 = np.array([1, 2, 3])
+        pc = _ConstraintWrapper(Bounds(lb, ub), x0)
+        assert (pc.violation(x0) > 0).any()
+        assert (pc.violation([0.25, 21, 31]) == 0).all()
+
+        # check vectorized Bounds constraint
+        xs = np.arange(1, 16).reshape(5, 3)
+        violations = []
+        for x in xs:
+            violations.append(pc.violation(x))
+        np.testing.assert_allclose(pc.violation(xs.T), np.array(violations).T)
+
+        x0 = np.array([1, 2, 3, 4])
+        A = np.array([[1, 2, 3, 4], [5, 0, 0, 6], [7, 0, 8, 0]])
+        pc = _ConstraintWrapper(LinearConstraint(A, -np.inf, 0), x0)
+        assert (pc.violation(x0) > 0).any()
+        assert (pc.violation([-10, 2, -10, 4]) == 0).all()
+
+        # check vectorized LinearConstraint, for 7 lots of parameter vectors
+        # with each parameter vector being 4 long, with 3 constraints
+        # xs is the same shape as stored in the differential evolution
+        # population, but it's sent to the violation function as (len(x), M)
+        xs = np.arange(1, 29).reshape(7, 4)
+        violations = []
+        for x in xs:
+            violations.append(pc.violation(x))
+        np.testing.assert_allclose(pc.violation(xs.T), np.array(violations).T)
+
+        pc = _ConstraintWrapper(LinearConstraint(csr_matrix(A), -np.inf, 0),
+                                x0)
+        assert (pc.violation(x0) > 0).any()
+        assert (pc.violation([-10, 2, -10, 4]) == 0).all()
+
+        def fun(x):
+            return A.dot(x)
+
+        nonlinear = NonlinearConstraint(fun, -np.inf, 0)
+        pc = _ConstraintWrapper(nonlinear, [-10, 2, -10, 4])
+        assert (pc.violation(x0) > 0).any()
+        assert (pc.violation([-10, 2, -10, 4]) == 0).all()
+
+    def test_constraint_wrapper_violation(self):
+        def cons_f(x):
+            # written in vectorised form to accept an array of (N, S)
+            # returning (M, S)
+            # where N is the number of parameters,
+            # S is the number of solution vectors to be examined,
+            # and M is the number of constraint components
+            return np.array([x[0] ** 2 + x[1],
+                             x[0] ** 2 - x[1]])
+
+        nlc = NonlinearConstraint(cons_f, [-1, -0.8500], [2, 2])
+        pc = _ConstraintWrapper(nlc, [0.5, 1])
+        assert np.size(pc.bounds[0]) == 2
+
+        xs = [(0.5, 1), (0.5, 1.2), (1.2, 1.2), (0.1, -1.2), (0.1, 2.0)]
+        vs = [(0, 0), (0, 0.1), (0.64, 0), (0.19, 0), (0.01, 1.14)]
+
+        for x, v in zip(xs, vs):
+            assert_allclose(pc.violation(x), v)
+
+        # now check that we can vectorize the constraint wrapper
+        assert_allclose(pc.violation(np.array(xs).T),
+                        np.array(vs).T)
+        assert pc.fun(np.array(xs).T).shape == (2, len(xs))
+        assert pc.violation(np.array(xs).T).shape == (2, len(xs))
+        assert pc.num_constr == 2
+        assert pc.parameter_count == 2
+
+    def test_matrix_linear_constraint(self):
+        # gh20041 supplying an np.matrix to construct a LinearConstraint caused
+        # _ConstraintWrapper to start returning constraint violations of the
+        # wrong shape.
+        with suppress_warnings() as sup:
+            sup.filter(PendingDeprecationWarning)
+            matrix = np.matrix([[1, 1, 1, 1.],
+                                [2, 2, 2, 2.]])
+        lc = LinearConstraint(matrix, 0, 1)
+        x0 = np.ones(4)
+        cw = _ConstraintWrapper(lc, x0)
+        # the shape of the constraint violation should be the same as the number
+        # of constraints applied.
+        assert cw.violation(x0).shape == (2,)
+
+        # let's try a vectorised violation call.
+        xtrial = np.arange(4 * 5).reshape(4, 5)
+        assert cw.violation(xtrial).shape == (2, 5)
+
+    @pytest.mark.fail_slow(10)
+    def test_L1(self):
+        # Lampinen ([5]) test problem 1
+
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = np.sum(5*x[1:5]) - 5*x[1:5]@x[1:5] - np.sum(x[5:])
+            return fun
+
+        A = np.zeros((10, 14))  # 1-indexed to match reference
+        A[1, [1, 2, 10, 11]] = 2, 2, 1, 1
+        A[2, [1, 10]] = -8, 1
+        A[3, [4, 5, 10]] = -2, -1, 1
+        A[4, [1, 3, 10, 11]] = 2, 2, 1, 1
+        A[5, [2, 11]] = -8, 1
+        A[6, [6, 7, 11]] = -2, -1, 1
+        A[7, [2, 3, 11, 12]] = 2, 2, 1, 1
+        A[8, [3, 12]] = -8, 1
+        A[9, [8, 9, 12]] = -2, -1, 1
+        A = A[1:, 1:]
+
+        b = np.array([10, 0, 0, 10, 0, 0, 10, 0, 0])
+
+        L = LinearConstraint(A, -np.inf, b)
+
+        bounds = [(0, 1)]*9 + [(0, 100)]*3 + [(0, 1)]
+
+        # using a lower popsize to speed the test up
+        res = differential_evolution(
+            f, bounds, strategy='best1bin', seed=1234, constraints=(L,),
+            popsize=2, tol=0.05
+        )
+
+        x_opt = (1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1)
+        f_opt = -15
+
+        assert_allclose(f(x_opt), f_opt, atol=6e-4)
+        assert res.success
+        assert_allclose(res.x, x_opt, atol=6e-4)
+        assert_allclose(res.fun, f_opt, atol=5e-3)
+        assert_(np.all(A@res.x <= b))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+        # now repeat the same solve, using the same overall constraints,
+        # but using a sparse matrix for the LinearConstraint instead of an
+        # array
+
+        L = LinearConstraint(csr_matrix(A), -np.inf, b)
+
+        # using a lower popsize to speed the test up
+        res = differential_evolution(
+            f, bounds, strategy='best1bin', seed=1234, constraints=(L,),
+            popsize=2, tol=0.05
+        )
+
+        assert_allclose(f(x_opt), f_opt)
+        assert res.success
+        assert_allclose(res.x, x_opt, atol=5e-4)
+        assert_allclose(res.fun, f_opt, atol=5e-3)
+        assert_(np.all(A@res.x <= b))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+        # now repeat the same solve, using the same overall constraints,
+        # but specify half the constraints in terms of LinearConstraint,
+        # and the other half by NonlinearConstraint
+        def c1(x):
+            x = np.hstack(([0], x))
+            return [2*x[2] + 2*x[3] + x[11] + x[12],
+                    -8*x[3] + x[12]]
+
+        def c2(x):
+            x = np.hstack(([0], x))
+            return -2*x[8] - x[9] + x[12]
+
+        L = LinearConstraint(A[:5, :], -np.inf, b[:5])
+        L2 = LinearConstraint(A[5:6, :], -np.inf, b[5:6])
+        N = NonlinearConstraint(c1, -np.inf, b[6:8])
+        N2 = NonlinearConstraint(c2, -np.inf, b[8:9])
+        constraints = (L, N, L2, N2)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning)
+            res = differential_evolution(
+                f, bounds, strategy='best1bin', seed=1234,
+                constraints=constraints, popsize=2, tol=0.05
+            )
+
+        assert_allclose(res.x, x_opt, atol=6e-4)
+        assert_allclose(res.fun, f_opt, atol=5e-3)
+        assert_(np.all(A@res.x <= b))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L2(self):
+        # Lampinen ([5]) test problem 2
+
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = ((x[1]-10)**2 + 5*(x[2]-12)**2 + x[3]**4 + 3*(x[4]-11)**2 +
+                   10*x[5]**6 + 7*x[6]**2 + x[7]**4 - 4*x[6]*x[7] - 10*x[6] -
+                   8*x[7])
+            return fun
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [127 - 2*x[1]**2 - 3*x[2]**4 - x[3] - 4*x[4]**2 - 5*x[5],
+                    196 - 23*x[1] - x[2]**2 - 6*x[6]**2 + 8*x[7],
+                    282 - 7*x[1] - 3*x[2] - 10*x[3]**2 - x[4] + x[5],
+                    -4*x[1]**2 - x[2]**2 + 3*x[1]*x[2] - 2*x[3]**2 -
+                    5*x[6] + 11*x[7]]
+
+        N = NonlinearConstraint(c1, 0, np.inf)
+        bounds = [(-10, 10)]*7
+        constraints = (N)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning)
+            res = differential_evolution(f, bounds, strategy='best1bin',
+                                         seed=1234, constraints=constraints)
+
+        f_opt = 680.6300599487869
+        x_opt = (2.330499, 1.951372, -0.4775414, 4.365726,
+                 -0.6244870, 1.038131, 1.594227)
+
+        assert_allclose(f(x_opt), f_opt)
+        assert_allclose(res.fun, f_opt)
+        assert_allclose(res.x, x_opt, atol=1e-5)
+        assert res.success
+        assert_(np.all(np.array(c1(res.x)) >= 0))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L3(self):
+        # Lampinen ([5]) test problem 3
+
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = (x[1]**2 + x[2]**2 + x[1]*x[2] - 14*x[1] - 16*x[2] +
+                   (x[3]-10)**2 + 4*(x[4]-5)**2 + (x[5]-3)**2 + 2*(x[6]-1)**2 +
+                   5*x[7]**2 + 7*(x[8]-11)**2 + 2*(x[9]-10)**2 +
+                   (x[10] - 7)**2 + 45
+                   )
+            return fun  # maximize
+
+        A = np.zeros((4, 11))
+        A[1, [1, 2, 7, 8]] = -4, -5, 3, -9
+        A[2, [1, 2, 7, 8]] = -10, 8, 17, -2
+        A[3, [1, 2, 9, 10]] = 8, -2, -5, 2
+        A = A[1:, 1:]
+        b = np.array([-105, 0, -12])
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [3*x[1] - 6*x[2] - 12*(x[9]-8)**2 + 7*x[10],
+                    -3*(x[1]-2)**2 - 4*(x[2]-3)**2 - 2*x[3]**2 + 7*x[4] + 120,
+                    -x[1]**2 - 2*(x[2]-2)**2 + 2*x[1]*x[2] - 14*x[5] + 6*x[6],
+                    -5*x[1]**2 - 8*x[2] - (x[3]-6)**2 + 2*x[4] + 40,
+                    -0.5*(x[1]-8)**2 - 2*(x[2]-4)**2 - 3*x[5]**2 + x[6] + 30]
+
+        L = LinearConstraint(A, b, np.inf)
+        N = NonlinearConstraint(c1, 0, np.inf)
+        bounds = [(-10, 10)]*10
+        constraints = (L, N)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning)
+            res = differential_evolution(f, bounds, seed=1234,
+                                         constraints=constraints, popsize=3)
+
+        x_opt = (2.171996, 2.363683, 8.773926, 5.095984, 0.9906548,
+                 1.430574, 1.321644, 9.828726, 8.280092, 8.375927)
+        f_opt = 24.3062091
+
+        assert_allclose(f(x_opt), f_opt, atol=1e-5)
+        assert_allclose(res.x, x_opt, atol=1e-6)
+        assert_allclose(res.fun, f_opt, atol=1e-5)
+        assert res.success
+        assert_(np.all(A @ res.x >= b))
+        assert_(np.all(np.array(c1(res.x)) >= 0))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L4(self):
+        # Lampinen ([5]) test problem 4
+        def f(x):
+            return np.sum(x[:3])
+
+        A = np.zeros((4, 9))
+        A[1, [4, 6]] = 0.0025, 0.0025
+        A[2, [5, 7, 4]] = 0.0025, 0.0025, -0.0025
+        A[3, [8, 5]] = 0.01, -0.01
+        A = A[1:, 1:]
+        b = np.array([1, 1, 1])
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [x[1]*x[6] - 833.33252*x[4] - 100*x[1] + 83333.333,
+                    x[2]*x[7] - 1250*x[5] - x[2]*x[4] + 1250*x[4],
+                    x[3]*x[8] - 1250000 - x[3]*x[5] + 2500*x[5]]
+
+        L = LinearConstraint(A, -np.inf, 1)
+        N = NonlinearConstraint(c1, 0, np.inf)
+
+        bounds = [(100, 10000)] + [(1000, 10000)]*2 + [(10, 1000)]*5
+        constraints = (L, N)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning)
+            res = differential_evolution(
+                f, bounds, strategy='best1bin', seed=1234,
+                constraints=constraints, popsize=3, tol=0.05
+            )
+
+        f_opt = 7049.248
+
+        x_opt = [579.306692, 1359.97063, 5109.9707, 182.0177, 295.601172,
+                217.9823, 286.416528, 395.601172]
+
+        assert_allclose(f(x_opt), f_opt, atol=0.001)
+        assert_allclose(res.fun, f_opt, atol=0.001)
+
+        # use higher tol here for 32-bit Windows, see gh-11693
+        if (platform.system() == 'Windows' and np.dtype(np.intp).itemsize < 8):
+            assert_allclose(res.x, x_opt, rtol=2.4e-6, atol=0.0035)
+        else:
+            # tolerance determined from macOS + MKL failure, see gh-12701
+            assert_allclose(res.x, x_opt, rtol=5e-6, atol=0.0024)
+
+        assert res.success
+        assert_(np.all(A @ res.x <= b))
+        assert_(np.all(np.array(c1(res.x)) >= 0))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L5(self):
+        # Lampinen ([5]) test problem 5
+
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = (np.sin(2*np.pi*x[1])**3*np.sin(2*np.pi*x[2]) /
+                   (x[1]**3*(x[1]+x[2])))
+            return -fun  # maximize
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [x[1]**2 - x[2] + 1,
+                    1 - x[1] + (x[2]-4)**2]
+
+        N = NonlinearConstraint(c1, -np.inf, 0)
+        bounds = [(0, 10)]*2
+        constraints = (N)
+
+        res = differential_evolution(f, bounds, strategy='rand1bin', seed=1234,
+                                     constraints=constraints)
+
+        x_opt = (1.22797135, 4.24537337)
+        f_opt = -0.095825
+        assert_allclose(f(x_opt), f_opt, atol=2e-5)
+        assert_allclose(res.fun, f_opt, atol=1e-4)
+        assert res.success
+        assert_(np.all(np.array(c1(res.x)) <= 0))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L6(self):
+        # Lampinen ([5]) test problem 6
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = (x[1]-10)**3 + (x[2] - 20)**3
+            return fun
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [(x[1]-5)**2 + (x[2] - 5)**2 - 100,
+                    -(x[1]-6)**2 - (x[2] - 5)**2 + 82.81]
+
+        N = NonlinearConstraint(c1, 0, np.inf)
+        bounds = [(13, 100), (0, 100)]
+        constraints = (N)
+        res = differential_evolution(f, bounds, strategy='rand1bin', seed=1234,
+                                     constraints=constraints, tol=1e-7)
+        x_opt = (14.095, 0.84296)
+        f_opt = -6961.814744
+
+        assert_allclose(f(x_opt), f_opt, atol=1e-6)
+        assert_allclose(res.fun, f_opt, atol=0.001)
+        assert_allclose(res.x, x_opt, atol=1e-4)
+        assert res.success
+        assert_(np.all(np.array(c1(res.x)) >= 0))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    def test_L7(self):
+        # Lampinen ([5]) test problem 7
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = (5.3578547*x[3]**2 + 0.8356891*x[1]*x[5] +
+                   37.293239*x[1] - 40792.141)
+            return fun
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [
+                    85.334407 + 0.0056858*x[2]*x[5] + 0.0006262*x[1]*x[4] -
+                    0.0022053*x[3]*x[5],
+
+                    80.51249 + 0.0071317*x[2]*x[5] + 0.0029955*x[1]*x[2] +
+                    0.0021813*x[3]**2,
+
+                    9.300961 + 0.0047026*x[3]*x[5] + 0.0012547*x[1]*x[3] +
+                    0.0019085*x[3]*x[4]
+                    ]
+
+        N = NonlinearConstraint(c1, [0, 90, 20], [92, 110, 25])
+
+        bounds = [(78, 102), (33, 45)] + [(27, 45)]*3
+        constraints = (N)
+
+        res = differential_evolution(f, bounds, strategy='rand1bin', seed=1234,
+                                     constraints=constraints)
+
+        # using our best solution, rather than Lampinen/Koziel. Koziel solution
+        # doesn't satisfy constraints, Lampinen f_opt just plain wrong.
+        x_opt = [78.00000686, 33.00000362, 29.99526064, 44.99999971,
+                 36.77579979]
+
+        f_opt = -30665.537578
+
+        assert_allclose(f(x_opt), f_opt)
+        assert_allclose(res.x, x_opt, atol=1e-3)
+        assert_allclose(res.fun, f_opt, atol=1e-3)
+
+        assert res.success
+        assert_(np.all(np.array(c1(res.x)) >= np.array([0, 90, 20])))
+        assert_(np.all(np.array(c1(res.x)) <= np.array([92, 110, 25])))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.xslow
+    @pytest.mark.xfail(platform.machine() == 'ppc64le',
+                       reason="fails on ppc64le")
+    def test_L8(self):
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            fun = 3*x[1] + 0.000001*x[1]**3 + 2*x[2] + 0.000002/3*x[2]**3
+            return fun
+
+        A = np.zeros((3, 5))
+        A[1, [4, 3]] = 1, -1
+        A[2, [3, 4]] = 1, -1
+        A = A[1:, 1:]
+        b = np.array([-.55, -.55])
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [
+                    1000*np.sin(-x[3]-0.25) + 1000*np.sin(-x[4]-0.25) +
+                    894.8 - x[1],
+                    1000*np.sin(x[3]-0.25) + 1000*np.sin(x[3]-x[4]-0.25) +
+                    894.8 - x[2],
+                    1000*np.sin(x[4]-0.25) + 1000*np.sin(x[4]-x[3]-0.25) +
+                    1294.8
+                    ]
+        L = LinearConstraint(A, b, np.inf)
+        N = NonlinearConstraint(c1, np.full(3, -0.001), np.full(3, 0.001))
+
+        bounds = [(0, 1200)]*2+[(-.55, .55)]*2
+        constraints = (L, N)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning)
+            # original Lampinen test was with rand1bin, but that takes a
+            # huge amount of CPU time. Changing strategy to best1bin speeds
+            # things up a lot
+            res = differential_evolution(f, bounds, strategy='best1bin',
+                                         seed=1234, constraints=constraints,
+                                         maxiter=5000)
+
+        x_opt = (679.9453, 1026.067, 0.1188764, -0.3962336)
+        f_opt = 5126.4981
+
+        assert_allclose(f(x_opt), f_opt, atol=1e-3)
+        assert_allclose(res.x[:2], x_opt[:2], atol=2e-3)
+        assert_allclose(res.x[2:], x_opt[2:], atol=2e-3)
+        assert_allclose(res.fun, f_opt, atol=2e-2)
+        assert res.success
+        assert_(np.all(A@res.x >= b))
+        assert_(np.all(np.array(c1(res.x)) >= -0.001))
+        assert_(np.all(np.array(c1(res.x)) <= 0.001))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_L9(self):
+        # Lampinen ([5]) test problem 9
+
+        def f(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return x[1]**2 + (x[2]-1)**2
+
+        def c1(x):
+            x = np.hstack(([0], x))  # 1-indexed to match reference
+            return [x[2] - x[1]**2]
+
+        N = NonlinearConstraint(c1, [-.001], [0.001])
+
+        bounds = [(-1, 1)]*2
+        constraints = (N)
+        res = differential_evolution(f, bounds, strategy='rand1bin', seed=1234,
+                                     constraints=constraints)
+
+        x_opt = [np.sqrt(2)/2, 0.5]
+        f_opt = 0.75
+
+        assert_allclose(f(x_opt), f_opt)
+        assert_allclose(np.abs(res.x), x_opt, atol=1e-3)
+        assert_allclose(res.fun, f_opt, atol=1e-3)
+        assert res.success
+        assert_(np.all(np.array(c1(res.x)) >= -0.001))
+        assert_(np.all(np.array(c1(res.x)) <= 0.001))
+        assert_(np.all(res.x >= np.array(bounds)[:, 0]))
+        assert_(np.all(res.x <= np.array(bounds)[:, 1]))
+
+    @pytest.mark.fail_slow(5)
+    def test_integrality(self):
+        # test fitting discrete distribution to data
+        rng = np.random.default_rng(6519843218105)
+        dist = stats.nbinom
+        shapes = (5, 0.5)
+        x = dist.rvs(*shapes, size=10000, random_state=rng)
+
+        def func(p, *args):
+            dist, x = args
+            # negative log-likelihood function
+            ll = -np.log(dist.pmf(x, *p)).sum(axis=-1)
+            if np.isnan(ll):  # occurs when x is outside of support
+                ll = np.inf  # we don't want that
+            return ll
+
+        integrality = [True, False]
+        bounds = [(1, 18), (0, 0.95)]
+
+        res = differential_evolution(func, bounds, args=(dist, x),
+                                     integrality=integrality, polish=False,
+                                     seed=rng)
+        # tolerance has to be fairly relaxed for the second parameter
+        # because we're fitting a distribution to random variates.
+        assert res.x[0] == 5
+        assert_allclose(res.x, shapes, rtol=0.025)
+
+        # check that we can still use integrality constraints with polishing
+        res2 = differential_evolution(func, bounds, args=(dist, x),
+                                      integrality=integrality, polish=True,
+                                      seed=rng)
+
+        def func2(p, *args):
+            n, dist, x = args
+            return func(np.array([n, p[0]]), dist, x)
+
+        # compare the DE derived solution to an LBFGSB solution (that doesn't
+        # have to find the integral values). Note we're setting x0 to be the
+        # output from the first DE result, thereby making the polishing step
+        # and this minimisation pretty much equivalent.
+        LBFGSB = minimize(func2, res2.x[1], args=(5, dist, x),
+                          bounds=[(0, 0.95)])
+        assert_allclose(res2.x[1], LBFGSB.x)
+        assert res2.fun <= res.fun
+
+    def test_integrality_limits(self):
+        def f(x):
+            return x
+
+        integrality = [True, False, True]
+        bounds = [(0.2, 1.1), (0.9, 2.2), (3.3, 4.9)]
+
+        # no integrality constraints
+        solver = DifferentialEvolutionSolver(f, bounds=bounds, polish=False,
+                                             integrality=False)
+        assert_allclose(solver.limits[0], [0.2, 0.9, 3.3])
+        assert_allclose(solver.limits[1], [1.1, 2.2, 4.9])
+
+        # with integrality constraints
+        solver = DifferentialEvolutionSolver(f, bounds=bounds, polish=False,
+                                             integrality=integrality)
+        assert_allclose(solver.limits[0], [0.5, 0.9, 3.5])
+        assert_allclose(solver.limits[1], [1.5, 2.2, 4.5])
+        assert_equal(solver.integrality, [True, False, True])
+        assert solver.polish is False
+
+        bounds = [(-1.2, -0.9), (0.9, 2.2), (-10.3, 4.1)]
+        solver = DifferentialEvolutionSolver(f, bounds=bounds, polish=False,
+                                             integrality=integrality)
+        assert_allclose(solver.limits[0], [-1.5, 0.9, -10.5])
+        assert_allclose(solver.limits[1], [-0.5, 2.2, 4.5])
+
+        # A lower bound of -1.2 is converted to
+        # np.nextafter(np.ceil(-1.2) - 0.5, np.inf)
+        # with a similar process to the upper bound. Check that the
+        # conversions work
+        assert_allclose(np.round(solver.limits[0]), [-1.0, 1.0, -10.0])
+        assert_allclose(np.round(solver.limits[1]), [-1.0, 2.0, 4.0])
+
+        bounds = [(-10.2, -8.1), (0.9, 2.2), (-10.9, -9.9999)]
+        solver = DifferentialEvolutionSolver(f, bounds=bounds, polish=False,
+                                             integrality=integrality)
+        assert_allclose(solver.limits[0], [-10.5, 0.9, -10.5])
+        assert_allclose(solver.limits[1], [-8.5, 2.2, -9.5])
+
+        bounds = [(-10.2, -10.1), (0.9, 2.2), (-10.9, -9.9999)]
+        with pytest.raises(ValueError, match='One of the integrality'):
+            DifferentialEvolutionSolver(f, bounds=bounds, polish=False,
+                                        integrality=integrality)
+
+    @pytest.mark.fail_slow(5)
+    def test_vectorized(self):
+        def quadratic(x):
+            return np.sum(x**2)
+
+        def quadratic_vec(x):
+            return np.sum(x**2, axis=0)
+
+        # A vectorized function needs to accept (len(x), S) and return (S,)
+        with pytest.raises(RuntimeError, match='The vectorized function'):
+            differential_evolution(quadratic, self.bounds,
+                                   vectorized=True, updating='deferred')
+
+        # vectorized overrides the updating keyword, check for warning
+        with warns(UserWarning, match="differential_evolution: the 'vector"):
+            differential_evolution(quadratic_vec, self.bounds,
+                                   vectorized=True)
+
+        # vectorized defers to the workers keyword, check for warning
+        with warns(UserWarning, match="differential_evolution: the 'workers"):
+            differential_evolution(quadratic_vec, self.bounds,
+                                   vectorized=True, workers=map,
+                                   updating='deferred')
+
+        ncalls = [0]
+
+        def rosen_vec(x):
+            ncalls[0] += 1
+            return rosen(x)
+
+        bounds = [(0, 10), (0, 10)]
+        res1 = differential_evolution(rosen, bounds, updating='deferred',
+                                      seed=1)
+        res2 = differential_evolution(rosen_vec, bounds, vectorized=True,
+                                      updating='deferred', seed=1)
+
+        # the two minimisation runs should be functionally equivalent
+        assert_allclose(res1.x, res2.x)
+        assert ncalls[0] == res2.nfev
+        assert res1.nit == res2.nit
+
+    def test_vectorized_constraints(self):
+        def constr_f(x):
+            return np.array([x[0] + x[1]])
+
+        def constr_f2(x):
+            return np.array([x[0]**2 + x[1], x[0] - x[1]])
+
+        nlc1 = NonlinearConstraint(constr_f, -np.inf, 1.9)
+        nlc2 = NonlinearConstraint(constr_f2, (0.9, 0.5), (2.0, 2.0))
+
+        def rosen_vec(x):
+            # accept an (len(x0), S) array, returning a (S,) array
+            v = 100 * (x[1:] - x[:-1]**2.0)**2.0
+            v += (1 - x[:-1])**2.0
+            return np.squeeze(v)
+
+        bounds = [(0, 10), (0, 10)]
+
+        res1 = differential_evolution(rosen, bounds, updating='deferred',
+                                      seed=1, constraints=[nlc1, nlc2],
+                                      polish=False)
+        res2 = differential_evolution(rosen_vec, bounds, vectorized=True,
+                                      updating='deferred', seed=1,
+                                      constraints=[nlc1, nlc2],
+                                      polish=False)
+        # the two minimisation runs should be functionally equivalent
+        assert_allclose(res1.x, res2.x)
+
+    def test_constraint_violation_error_message(self):
+
+        def func(x):
+            return np.cos(x[0]) + np.sin(x[1])
+
+        # Intentionally infeasible constraints.
+        c0 = NonlinearConstraint(lambda x: x[1] - (x[0]-1)**2, 0, np.inf)
+        c1 = NonlinearConstraint(lambda x: x[1] + x[0]**2, -np.inf, 0)
+
+        result = differential_evolution(func,
+                                        bounds=[(-1, 2), (-1, 1)],
+                                        constraints=[c0, c1],
+                                        maxiter=10,
+                                        polish=False,
+                                        seed=864197532)
+        assert result.success is False
+        # The numerical value in the error message might be sensitive to
+        # changes in the implementation.  It can be updated if the code is
+        # changed.  The essential part of the test is that there is a number
+        # after the '=', so if necessary, the text could be reduced to, say,
+        # "MAXCV = 0.".
+        assert "MAXCV = 0.4" in result.message
+
+    @pytest.mark.fail_slow(10)  # fail-slow exception by request - see gh-20806
+    def test_strategy_fn(self):
+        # examines ability to customize strategy by mimicking one of the
+        # in-built strategies
+        parameter_count = 4
+        popsize = 10
+        bounds = [(0, 10.)] * parameter_count
+        total_popsize = parameter_count * popsize
+        mutation = 0.8
+        recombination = 0.7
+
+        calls = [0]
+        def custom_strategy_fn(candidate, population, rng=None):
+            calls[0] += 1
+            trial = np.copy(population[candidate])
+            fill_point = rng.choice(parameter_count)
+
+            pool = np.arange(total_popsize)
+            rng.shuffle(pool)
+            idxs = pool[:2 + 1]
+            idxs = idxs[idxs != candidate][:2]
+
+            r0, r1 = idxs[:2]
+
+            bprime = (population[0] + mutation *
+                    (population[r0] - population[r1]))
+
+            crossovers = rng.uniform(size=parameter_count)
+            crossovers = crossovers < recombination
+            crossovers[fill_point] = True
+            trial = np.where(crossovers, bprime, trial)
+            return trial
+
+        solver = DifferentialEvolutionSolver(
+            rosen,
+            bounds,
+            popsize=popsize,
+            recombination=recombination,
+            mutation=mutation,
+            maxiter=2,
+            strategy=custom_strategy_fn,
+            seed=10,
+            polish=False
+        )
+        assert solver.strategy is custom_strategy_fn
+        solver.solve()
+        assert calls[0] > 0
+
+        # check custom strategy works with updating='deferred'
+        res = differential_evolution(
+            rosen, bounds, strategy=custom_strategy_fn, updating='deferred'
+        )
+        assert res.success
+
+        def custom_strategy_fn(candidate, population, rng=None):
+            return np.array([1.0, 2.0])
+
+        with pytest.raises(RuntimeError, match="strategy*"):
+            differential_evolution(
+                rosen,
+                bounds,
+                strategy=custom_strategy_fn
+            )
+
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__dual_annealing.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__dual_annealing.py
new file mode 100644
index 0000000000000000000000000000000000000000..041dffc5b5096c89e96eac19847ec02c31d29de0
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__dual_annealing.py
@@ -0,0 +1,406 @@
+# Dual annealing unit tests implementation.
+# Copyright (c) 2018 Sylvain Gubian <sylvain.gubian@pmi.com>,
+# Yang Xiang <yang.xiang@pmi.com>
+# Author: Sylvain Gubian, PMP S.A.
+"""
+Unit tests for the dual annealing global optimizer
+"""
+from scipy.optimize import dual_annealing, Bounds
+
+from scipy.optimize._dual_annealing import EnergyState
+from scipy.optimize._dual_annealing import LocalSearchWrapper
+from scipy.optimize._dual_annealing import ObjectiveFunWrapper
+from scipy.optimize._dual_annealing import StrategyChain
+from scipy.optimize._dual_annealing import VisitingDistribution
+from scipy.optimize import rosen, rosen_der
+import pytest
+import numpy as np
+from numpy.testing import assert_equal, assert_allclose, assert_array_less
+from pytest import raises as assert_raises
+from scipy._lib._util import check_random_state
+
+
+class TestDualAnnealing:
+
+    def setup_method(self):
+        # A function that returns always infinity for initialization tests
+        self.weirdfunc = lambda x: np.inf
+        # 2-D bounds for testing function
+        self.ld_bounds = [(-5.12, 5.12)] * 2
+        # 4-D bounds for testing function
+        self.hd_bounds = self.ld_bounds * 4
+        # Number of values to be generated for testing visit function
+        self.nbtestvalues = 5000
+        self.high_temperature = 5230
+        self.low_temperature = 0.1
+        self.qv = 2.62
+        self.seed = 1234
+        self.rs = check_random_state(self.seed)
+        self.nb_fun_call = 0
+        self.ngev = 0
+
+    def callback(self, x, f, context):
+        # For testing callback mechanism. Should stop for e <= 1 as
+        # the callback function returns True
+        if f <= 1.0:
+            return True
+
+    def func(self, x, args=()):
+        # Using Rastrigin function for performing tests
+        if args:
+            shift = args
+        else:
+            shift = 0
+        y = np.sum((x - shift) ** 2 - 10 * np.cos(2 * np.pi * (
+            x - shift))) + 10 * np.size(x) + shift
+        self.nb_fun_call += 1
+        return y
+
+    def rosen_der_wrapper(self, x, args=()):
+        self.ngev += 1
+        return rosen_der(x, *args)
+
+    # FIXME: there are some discontinuities in behaviour as a function of `qv`,
+    #        this needs investigating - see gh-12384
+    @pytest.mark.parametrize('qv', [1.1, 1.41, 2, 2.62, 2.9])
+    def test_visiting_stepping(self, qv):
+        lu = list(zip(*self.ld_bounds))
+        lower = np.array(lu[0])
+        upper = np.array(lu[1])
+        dim = lower.size
+        vd = VisitingDistribution(lower, upper, qv, self.rs)
+        values = np.zeros(dim)
+        x_step_low = vd.visiting(values, 0, self.high_temperature)
+        # Make sure that only the first component is changed
+        assert_equal(np.not_equal(x_step_low, 0), True)
+        values = np.zeros(dim)
+        x_step_high = vd.visiting(values, dim, self.high_temperature)
+        # Make sure that component other than at dim has changed
+        assert_equal(np.not_equal(x_step_high[0], 0), True)
+
+    @pytest.mark.parametrize('qv', [2.25, 2.62, 2.9])
+    def test_visiting_dist_high_temperature(self, qv):
+        lu = list(zip(*self.ld_bounds))
+        lower = np.array(lu[0])
+        upper = np.array(lu[1])
+        vd = VisitingDistribution(lower, upper, qv, self.rs)
+        # values = np.zeros(self.nbtestvalues)
+        # for i in np.arange(self.nbtestvalues):
+        #     values[i] = vd.visit_fn(self.high_temperature)
+        values = vd.visit_fn(self.high_temperature, self.nbtestvalues)
+
+        # Visiting distribution is a distorted version of Cauchy-Lorentz
+        # distribution, and as no 1st and higher moments (no mean defined,
+        # no variance defined).
+        # Check that big tails values are generated
+        assert_array_less(np.min(values), 1e-10)
+        assert_array_less(1e+10, np.max(values))
+
+    def test_reset(self):
+        owf = ObjectiveFunWrapper(self.weirdfunc)
+        lu = list(zip(*self.ld_bounds))
+        lower = np.array(lu[0])
+        upper = np.array(lu[1])
+        es = EnergyState(lower, upper)
+        assert_raises(ValueError, es.reset, owf, check_random_state(None))
+
+    def test_low_dim(self):
+        ret = dual_annealing(
+            self.func, self.ld_bounds, seed=self.seed)
+        assert_allclose(ret.fun, 0., atol=1e-12)
+        assert ret.success
+
+    @pytest.mark.fail_slow(5)
+    def test_high_dim(self):
+        ret = dual_annealing(self.func, self.hd_bounds, seed=self.seed)
+        assert_allclose(ret.fun, 0., atol=1e-12)
+        assert ret.success
+
+    def test_low_dim_no_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             no_local_search=True, seed=self.seed)
+        assert_allclose(ret.fun, 0., atol=1e-4)
+
+    @pytest.mark.fail_slow(5)
+    def test_high_dim_no_ls(self):
+        ret = dual_annealing(self.func, self.hd_bounds,
+                             no_local_search=True, seed=self.seed)
+        assert_allclose(ret.fun, 0., atol=1e-4)
+
+    def test_nb_fun_call(self):
+        ret = dual_annealing(self.func, self.ld_bounds, seed=self.seed)
+        assert_equal(self.nb_fun_call, ret.nfev)
+
+    def test_nb_fun_call_no_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             no_local_search=True, seed=self.seed)
+        assert_equal(self.nb_fun_call, ret.nfev)
+
+    def test_max_reinit(self):
+        assert_raises(ValueError, dual_annealing, self.weirdfunc,
+                      self.ld_bounds)
+
+    @pytest.mark.fail_slow(5)
+    def test_reproduce(self):
+        res1 = dual_annealing(self.func, self.ld_bounds, seed=self.seed)
+        res2 = dual_annealing(self.func, self.ld_bounds, seed=self.seed)
+        res3 = dual_annealing(self.func, self.ld_bounds, seed=self.seed)
+        # If we have reproducible results, x components found has to
+        # be exactly the same, which is not the case with no seeding
+        assert_equal(res1.x, res2.x)
+        assert_equal(res1.x, res3.x)
+
+    def test_rand_gen(self):
+        # check that np.random.Generator can be used (numpy >= 1.17)
+        # obtain a np.random.Generator object
+        rng = np.random.default_rng(1)
+
+        res1 = dual_annealing(self.func, self.ld_bounds, seed=rng)
+        # seed again
+        rng = np.random.default_rng(1)
+        res2 = dual_annealing(self.func, self.ld_bounds, seed=rng)
+        # If we have reproducible results, x components found has to
+        # be exactly the same, which is not the case with no seeding
+        assert_equal(res1.x, res2.x)
+
+    def test_bounds_integrity(self):
+        wrong_bounds = [(-5.12, 5.12), (1, 0), (5.12, 5.12)]
+        assert_raises(ValueError, dual_annealing, self.func,
+                      wrong_bounds)
+
+    def test_bound_validity(self):
+        invalid_bounds = [(-5, 5), (-np.inf, 0), (-5, 5)]
+        assert_raises(ValueError, dual_annealing, self.func,
+                      invalid_bounds)
+        invalid_bounds = [(-5, 5), (0, np.inf), (-5, 5)]
+        assert_raises(ValueError, dual_annealing, self.func,
+                      invalid_bounds)
+        invalid_bounds = [(-5, 5), (0, np.nan), (-5, 5)]
+        assert_raises(ValueError, dual_annealing, self.func,
+                      invalid_bounds)
+
+    def test_deprecated_local_search_options_bounds(self):
+        def func(x):
+            return np.sum((x - 5) * (x - 1))
+        bounds = list(zip([-6, -5], [6, 5]))
+        # Test bounds can be passed (see gh-10831)
+
+        with pytest.warns(RuntimeWarning, match=r"Method CG cannot handle "):
+            dual_annealing(
+                func,
+                bounds=bounds,
+                minimizer_kwargs={"method": "CG", "bounds": bounds})
+
+    def test_minimizer_kwargs_bounds(self):
+        def func(x):
+            return np.sum((x - 5) * (x - 1))
+        bounds = list(zip([-6, -5], [6, 5]))
+        # Test bounds can be passed (see gh-10831)
+        dual_annealing(
+            func,
+            bounds=bounds,
+            minimizer_kwargs={"method": "SLSQP", "bounds": bounds})
+
+        with pytest.warns(RuntimeWarning, match=r"Method CG cannot handle "):
+            dual_annealing(
+                func,
+                bounds=bounds,
+                minimizer_kwargs={"method": "CG", "bounds": bounds})
+
+    def test_max_fun_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds, maxfun=100,
+                             seed=self.seed)
+
+        ls_max_iter = min(max(
+            len(self.ld_bounds) * LocalSearchWrapper.LS_MAXITER_RATIO,
+            LocalSearchWrapper.LS_MAXITER_MIN),
+            LocalSearchWrapper.LS_MAXITER_MAX)
+        assert ret.nfev <= 100 + ls_max_iter
+        assert not ret.success
+
+    def test_max_fun_no_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             no_local_search=True, maxfun=500, seed=self.seed)
+        assert ret.nfev <= 500
+        assert not ret.success
+
+    def test_maxiter(self):
+        ret = dual_annealing(self.func, self.ld_bounds, maxiter=700,
+                             seed=self.seed)
+        assert ret.nit <= 700
+
+    # Testing that args are passed correctly for dual_annealing
+    def test_fun_args_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             args=((3.14159,)), seed=self.seed)
+        assert_allclose(ret.fun, 3.14159, atol=1e-6)
+
+    # Testing that args are passed correctly for pure simulated annealing
+    def test_fun_args_no_ls(self):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             args=((3.14159, )), no_local_search=True,
+                             seed=self.seed)
+        assert_allclose(ret.fun, 3.14159, atol=1e-4)
+
+    def test_callback_stop(self):
+        # Testing that callback make the algorithm stop for
+        # fun value <= 1.0 (see callback method)
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             callback=self.callback, seed=self.seed)
+        assert ret.fun <= 1.0
+        assert 'stop early' in ret.message[0]
+        assert not ret.success
+
+    @pytest.mark.parametrize('method, atol', [
+        ('Nelder-Mead', 2e-5),
+        ('COBYLA', 1e-5),
+        ('COBYQA', 1e-8),
+        ('Powell', 1e-8),
+        ('CG', 1e-8),
+        ('BFGS', 1e-8),
+        ('TNC', 1e-8),
+        ('SLSQP', 2e-7),
+    ])
+    def test_multi_ls_minimizer(self, method, atol):
+        ret = dual_annealing(self.func, self.ld_bounds,
+                             minimizer_kwargs=dict(method=method),
+                             seed=self.seed)
+        assert_allclose(ret.fun, 0., atol=atol)
+
+    def test_wrong_restart_temp(self):
+        assert_raises(ValueError, dual_annealing, self.func,
+                      self.ld_bounds, restart_temp_ratio=1)
+        assert_raises(ValueError, dual_annealing, self.func,
+                      self.ld_bounds, restart_temp_ratio=0)
+
+    def test_gradient_gnev(self):
+        minimizer_opts = {
+            'jac': self.rosen_der_wrapper,
+        }
+        ret = dual_annealing(rosen, self.ld_bounds,
+                             minimizer_kwargs=minimizer_opts,
+                             seed=self.seed)
+        assert ret.njev == self.ngev
+
+    @pytest.mark.fail_slow(5)
+    def test_from_docstring(self):
+        def func(x):
+            return np.sum(x * x - 10 * np.cos(2 * np.pi * x)) + 10 * np.size(x)
+        lw = [-5.12] * 10
+        up = [5.12] * 10
+        ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234)
+        assert_allclose(ret.x,
+                        [-4.26437714e-09, -3.91699361e-09, -1.86149218e-09,
+                         -3.97165720e-09, -6.29151648e-09, -6.53145322e-09,
+                         -3.93616815e-09, -6.55623025e-09, -6.05775280e-09,
+                         -5.00668935e-09], atol=4e-8)
+        assert_allclose(ret.fun, 0.000000, atol=5e-13)
+
+    @pytest.mark.parametrize('new_e, temp_step, accepted, accept_rate', [
+        (0, 100, 1000, 1.0097587941791923),
+        (0, 2, 1000, 1.2599210498948732),
+        (10, 100, 878, 0.8786035869128718),
+        (10, 60, 695, 0.6812920690579612),
+        (2, 100, 990, 0.9897404249173424),
+    ])
+    def test_accept_reject_probabilistic(
+            self, new_e, temp_step, accepted, accept_rate):
+        # Test accepts unconditionally with e < current_energy and
+        # probabilistically with e > current_energy
+
+        rs = check_random_state(123)
+
+        count_accepted = 0
+        iterations = 1000
+
+        accept_param = -5
+        current_energy = 1
+        for _ in range(iterations):
+            energy_state = EnergyState(lower=None, upper=None)
+            # Set energy state with current_energy, any location.
+            energy_state.update_current(current_energy, [0])
+
+            chain = StrategyChain(
+                accept_param, None, None, None, rs, energy_state)
+            # Normally this is set in run()
+            chain.temperature_step = temp_step
+
+            # Check if update is accepted.
+            chain.accept_reject(j=1, e=new_e, x_visit=[2])
+            if energy_state.current_energy == new_e:
+                count_accepted += 1
+
+        assert count_accepted == accepted
+
+        # Check accept rate
+        pqv = 1 - (1 - accept_param) * (new_e - current_energy) / temp_step
+        rate = 0 if pqv <= 0 else np.exp(np.log(pqv) / (1 - accept_param))
+
+        assert_allclose(rate, accept_rate)
+
+    @pytest.mark.fail_slow(5)
+    def test_bounds_class(self):
+        # test that result does not depend on the bounds type
+        def func(x):
+            f = np.sum(x * x - 10 * np.cos(2 * np.pi * x)) + 10 * np.size(x)
+            return f
+        lw = [-5.12] * 5
+        up = [5.12] * 5
+
+        # Unbounded global minimum is all zeros. Most bounds below will force
+        # a DV away from unbounded minimum and be active at solution.
+        up[0] = -2.0
+        up[1] = -1.0
+        lw[3] = 1.0
+        lw[4] = 2.0
+
+        # run optimizations
+        bounds = Bounds(lw, up)
+        ret_bounds_class = dual_annealing(func, bounds=bounds, seed=1234)
+
+        bounds_old = list(zip(lw, up))
+        ret_bounds_list = dual_annealing(func, bounds=bounds_old, seed=1234)
+
+        # test that found minima, function evaluations and iterations match
+        assert_allclose(ret_bounds_class.x, ret_bounds_list.x, atol=1e-8)
+        assert_allclose(ret_bounds_class.x, np.arange(-2, 3), atol=1e-7)
+        assert_allclose(ret_bounds_list.fun, ret_bounds_class.fun, atol=1e-9)
+        assert ret_bounds_list.nfev == ret_bounds_class.nfev
+
+    @pytest.mark.fail_slow(5)
+    def test_callable_jac_hess_with_args_gh11052(self):
+        # dual_annealing used to fail when `jac` was callable and `args` were
+        # used; check that this is resolved. Example is from gh-11052.
+
+        # extended to hess as part of closing gh20614
+        rng = np.random.default_rng(94253637693657847462)
+        def f(x, power):
+            return np.sum(np.exp(x ** power))
+
+        def jac(x, power):
+            return np.exp(x ** power) * power * x ** (power - 1)
+
+        def hess(x, power):
+            # calculated using WolframAlpha as d^2/dx^2 e^(x^p)
+            return np.diag(
+                power * np.exp(x ** power) * x ** (power - 2) *
+                (power * x ** power + power - 1)
+            )
+
+        def hessp(x, p, power):
+            return hess(x, power) @ p
+
+        res1 = dual_annealing(f, args=(2, ), bounds=[[0, 1], [0, 1]], seed=rng,
+                              minimizer_kwargs=dict(method='L-BFGS-B'))
+        res2 = dual_annealing(f, args=(2, ), bounds=[[0, 1], [0, 1]], seed=rng,
+                              minimizer_kwargs=dict(method='L-BFGS-B',
+                                                    jac=jac))
+        res3 = dual_annealing(f, args=(2, ), bounds=[[0, 1], [0, 1]], seed=rng,
+                              minimizer_kwargs=dict(method='newton-cg',
+                                                    jac=jac, hess=hess))
+        res4 = dual_annealing(f, args=(2, ), bounds=[[0, 1], [0, 1]], seed=rng,
+                              minimizer_kwargs=dict(method='newton-cg',
+                                                    jac=jac, hessp=hessp))
+        assert_allclose(res1.fun, res2.fun, rtol=1e-6)
+        assert_allclose(res3.fun, res2.fun, rtol=1e-6)
+        assert_allclose(res4.fun, res2.fun, rtol=1e-6)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__linprog_clean_inputs.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__linprog_clean_inputs.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b0e4097bc9aadbfd3335aa3a86d063216f2c69a
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__linprog_clean_inputs.py
@@ -0,0 +1,310 @@
+"""
+Unit test for Linear Programming via Simplex Algorithm.
+"""
+import numpy as np
+from numpy.testing import assert_, assert_allclose, assert_equal
+from pytest import raises as assert_raises
+from scipy.optimize._linprog_util import _clean_inputs, _LPProblem
+from scipy._lib._util import VisibleDeprecationWarning
+from copy import deepcopy
+from datetime import date
+
+
+def test_aliasing():
+    """
+    Test for ensuring that no objects referred to by `lp` attributes,
+    `c`, `A_ub`, `b_ub`, `A_eq`, `b_eq`, `bounds`, have been modified
+    by `_clean_inputs` as a side effect.
+    """
+    lp = _LPProblem(
+        c=1,
+        A_ub=[[1]],
+        b_ub=[1],
+        A_eq=[[1]],
+        b_eq=[1],
+        bounds=(-np.inf, np.inf)
+    )
+    lp_copy = deepcopy(lp)
+
+    _clean_inputs(lp)
+
+    assert_(lp.c == lp_copy.c, "c modified by _clean_inputs")
+    assert_(lp.A_ub == lp_copy.A_ub, "A_ub modified by _clean_inputs")
+    assert_(lp.b_ub == lp_copy.b_ub, "b_ub modified by _clean_inputs")
+    assert_(lp.A_eq == lp_copy.A_eq, "A_eq modified by _clean_inputs")
+    assert_(lp.b_eq == lp_copy.b_eq, "b_eq modified by _clean_inputs")
+    assert_(lp.bounds == lp_copy.bounds, "bounds modified by _clean_inputs")
+
+
+def test_aliasing2():
+    """
+    Similar purpose as `test_aliasing` above.
+    """
+    lp = _LPProblem(
+        c=np.array([1, 1]),
+        A_ub=np.array([[1, 1], [2, 2]]),
+        b_ub=np.array([[1], [1]]),
+        A_eq=np.array([[1, 1]]),
+        b_eq=np.array([1]),
+        bounds=[(-np.inf, np.inf), (None, 1)]
+    )
+    lp_copy = deepcopy(lp)
+
+    _clean_inputs(lp)
+
+    assert_allclose(lp.c, lp_copy.c, err_msg="c modified by _clean_inputs")
+    assert_allclose(lp.A_ub, lp_copy.A_ub, err_msg="A_ub modified by _clean_inputs")
+    assert_allclose(lp.b_ub, lp_copy.b_ub, err_msg="b_ub modified by _clean_inputs")
+    assert_allclose(lp.A_eq, lp_copy.A_eq, err_msg="A_eq modified by _clean_inputs")
+    assert_allclose(lp.b_eq, lp_copy.b_eq, err_msg="b_eq modified by _clean_inputs")
+    assert_(lp.bounds == lp_copy.bounds, "bounds modified by _clean_inputs")
+
+
+def test_missing_inputs():
+    c = [1, 2]
+    A_ub = np.array([[1, 1], [2, 2]])
+    b_ub = np.array([1, 1])
+    A_eq = np.array([[1, 1], [2, 2]])
+    b_eq = np.array([1, 1])
+
+    assert_raises(TypeError, _clean_inputs)
+    assert_raises(TypeError, _clean_inputs, _LPProblem(c=None))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_ub=A_ub))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_ub=A_ub, b_ub=None))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, b_ub=b_ub))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_ub=None, b_ub=b_ub))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_eq=A_eq))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_eq=A_eq, b_eq=None))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, b_eq=b_eq))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_eq=None, b_eq=b_eq))
+
+
+def test_too_many_dimensions():
+    cb = [1, 2, 3, 4]
+    A = np.random.rand(4, 4)
+    bad2D = [[1, 2], [3, 4]]
+    bad3D = np.random.rand(4, 4, 4)
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=bad2D, A_ub=A, b_ub=cb))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_ub=bad3D, b_ub=cb))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_ub=A, b_ub=bad2D))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_eq=bad3D, b_eq=cb))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_eq=A, b_eq=bad2D))
+
+
+def test_too_few_dimensions():
+    bad = np.random.rand(4, 4).ravel()
+    cb = np.random.rand(4)
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_ub=bad, b_ub=cb))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=cb, A_eq=bad, b_eq=cb))
+
+
+def test_inconsistent_dimensions():
+    m = 2
+    n = 4
+    c = [1, 2, 3, 4]
+
+    Agood = np.random.rand(m, n)
+    Abad = np.random.rand(m, n + 1)
+    bgood = np.random.rand(m)
+    bbad = np.random.rand(m + 1)
+    boundsbad = [(0, 1)] * (n + 1)
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_ub=Abad, b_ub=bgood))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_ub=Agood, b_ub=bbad))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_eq=Abad, b_eq=bgood))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, A_eq=Agood, b_eq=bbad))
+    assert_raises(ValueError, _clean_inputs, _LPProblem(c=c, bounds=boundsbad))
+    with np.testing.suppress_warnings() as sup:
+        sup.filter(VisibleDeprecationWarning, "Creating an ndarray from ragged")
+        assert_raises(ValueError, _clean_inputs,
+                      _LPProblem(c=c, bounds=[[1, 2], [2, 3], [3, 4], [4, 5, 6]]))
+
+
+def test_type_errors():
+    lp = _LPProblem(
+        c=[1, 2],
+        A_ub=np.array([[1, 1], [2, 2]]),
+        b_ub=np.array([1, 1]),
+        A_eq=np.array([[1, 1], [2, 2]]),
+        b_eq=np.array([1, 1]),
+        bounds=[(0, 1)]
+    )
+    bad = "hello"
+
+    assert_raises(TypeError, _clean_inputs, lp._replace(c=bad))
+    assert_raises(TypeError, _clean_inputs, lp._replace(A_ub=bad))
+    assert_raises(TypeError, _clean_inputs, lp._replace(b_ub=bad))
+    assert_raises(TypeError, _clean_inputs, lp._replace(A_eq=bad))
+    assert_raises(TypeError, _clean_inputs, lp._replace(b_eq=bad))
+
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=bad))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds="hi"))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=["hi"]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=[("hi")]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=[(1, "")]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=[(1, 2), (1, "")]))
+    assert_raises(TypeError, _clean_inputs,
+                  lp._replace(bounds=[(1, date(2020, 2, 29))]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=[[[1, 2]]]))
+
+
+def test_non_finite_errors():
+    lp = _LPProblem(
+        c=[1, 2],
+        A_ub=np.array([[1, 1], [2, 2]]),
+        b_ub=np.array([1, 1]),
+        A_eq=np.array([[1, 1], [2, 2]]),
+        b_eq=np.array([1, 1]),
+        bounds=[(0, 1)]
+    )
+    assert_raises(ValueError, _clean_inputs, lp._replace(c=[0, None]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(c=[np.inf, 0]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(c=[0, -np.inf]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(c=[np.nan, 0]))
+
+    assert_raises(ValueError, _clean_inputs, lp._replace(A_ub=[[1, 2], [None, 1]]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(b_ub=[np.inf, 1]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(A_eq=[[1, 2], [1, -np.inf]]))
+    assert_raises(ValueError, _clean_inputs, lp._replace(b_eq=[1, np.nan]))
+
+
+def test__clean_inputs1():
+    lp = _LPProblem(
+        c=[1, 2],
+        A_ub=[[1, 1], [2, 2]],
+        b_ub=[1, 1],
+        A_eq=[[1, 1], [2, 2]],
+        b_eq=[1, 1],
+        bounds=None
+    )
+
+    lp_cleaned = _clean_inputs(lp)
+
+    assert_allclose(lp_cleaned.c, np.array(lp.c))
+    assert_allclose(lp_cleaned.A_ub, np.array(lp.A_ub))
+    assert_allclose(lp_cleaned.b_ub, np.array(lp.b_ub))
+    assert_allclose(lp_cleaned.A_eq, np.array(lp.A_eq))
+    assert_allclose(lp_cleaned.b_eq, np.array(lp.b_eq))
+    assert_equal(lp_cleaned.bounds, [(0, np.inf)] * 2)
+
+    assert_(lp_cleaned.c.shape == (2,), "")
+    assert_(lp_cleaned.A_ub.shape == (2, 2), "")
+    assert_(lp_cleaned.b_ub.shape == (2,), "")
+    assert_(lp_cleaned.A_eq.shape == (2, 2), "")
+    assert_(lp_cleaned.b_eq.shape == (2,), "")
+
+
+def test__clean_inputs2():
+    lp = _LPProblem(
+        c=1,
+        A_ub=[[1]],
+        b_ub=1,
+        A_eq=[[1]],
+        b_eq=1,
+        bounds=(0, 1)
+    )
+
+    lp_cleaned = _clean_inputs(lp)
+
+    assert_allclose(lp_cleaned.c, np.array(lp.c))
+    assert_allclose(lp_cleaned.A_ub, np.array(lp.A_ub))
+    assert_allclose(lp_cleaned.b_ub, np.array(lp.b_ub))
+    assert_allclose(lp_cleaned.A_eq, np.array(lp.A_eq))
+    assert_allclose(lp_cleaned.b_eq, np.array(lp.b_eq))
+    assert_equal(lp_cleaned.bounds, [(0, 1)])
+
+    assert_(lp_cleaned.c.shape == (1,), "")
+    assert_(lp_cleaned.A_ub.shape == (1, 1), "")
+    assert_(lp_cleaned.b_ub.shape == (1,), "")
+    assert_(lp_cleaned.A_eq.shape == (1, 1), "")
+    assert_(lp_cleaned.b_eq.shape == (1,), "")
+
+
+def test__clean_inputs3():
+    lp = _LPProblem(
+        c=[[1, 2]],
+        A_ub=np.random.rand(2, 2),
+        b_ub=[[1], [2]],
+        A_eq=np.random.rand(2, 2),
+        b_eq=[[1], [2]],
+        bounds=[(0, 1)]
+    )
+
+    lp_cleaned = _clean_inputs(lp)
+
+    assert_allclose(lp_cleaned.c, np.array([1, 2]))
+    assert_allclose(lp_cleaned.b_ub, np.array([1, 2]))
+    assert_allclose(lp_cleaned.b_eq, np.array([1, 2]))
+    assert_equal(lp_cleaned.bounds, [(0, 1)] * 2)
+
+    assert_(lp_cleaned.c.shape == (2,), "")
+    assert_(lp_cleaned.b_ub.shape == (2,), "")
+    assert_(lp_cleaned.b_eq.shape == (2,), "")
+
+
+def test_bad_bounds():
+    lp = _LPProblem(c=[1, 2])
+
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=(1, 2, 2)))
+    assert_raises(ValueError, _clean_inputs, lp._replace(bounds=[(1, 2, 2)]))
+    with np.testing.suppress_warnings() as sup:
+        sup.filter(VisibleDeprecationWarning, "Creating an ndarray from ragged")
+        assert_raises(ValueError, _clean_inputs,
+                      lp._replace(bounds=[(1, 2), (1, 2, 2)]))
+    assert_raises(ValueError, _clean_inputs,
+                  lp._replace(bounds=[(1, 2), (1, 2), (1, 2)]))
+
+    lp = _LPProblem(c=[1, 2, 3, 4])
+
+    assert_raises(ValueError, _clean_inputs,
+                  lp._replace(bounds=[(1, 2, 3, 4), (1, 2, 3, 4)]))
+
+
+def test_good_bounds():
+    lp = _LPProblem(c=[1, 2])
+
+    lp_cleaned = _clean_inputs(lp)  # lp.bounds is None by default
+    assert_equal(lp_cleaned.bounds, [(0, np.inf)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[]))
+    assert_equal(lp_cleaned.bounds, [(0, np.inf)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[[]]))
+    assert_equal(lp_cleaned.bounds, [(0, np.inf)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=(1, 2)))
+    assert_equal(lp_cleaned.bounds, [(1, 2)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(1, 2)]))
+    assert_equal(lp_cleaned.bounds, [(1, 2)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(1, None)]))
+    assert_equal(lp_cleaned.bounds, [(1, np.inf)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(None, 1)]))
+    assert_equal(lp_cleaned.bounds, [(-np.inf, 1)] * 2)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(None, None), (-np.inf, None)]))
+    assert_equal(lp_cleaned.bounds, [(-np.inf, np.inf)] * 2)
+
+    lp = _LPProblem(c=[1, 2, 3, 4])
+
+    lp_cleaned = _clean_inputs(lp)  # lp.bounds is None by default
+    assert_equal(lp_cleaned.bounds, [(0, np.inf)] * 4)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=(1, 2)))
+    assert_equal(lp_cleaned.bounds, [(1, 2)] * 4)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(1, 2)]))
+    assert_equal(lp_cleaned.bounds, [(1, 2)] * 4)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(1, None)]))
+    assert_equal(lp_cleaned.bounds, [(1, np.inf)] * 4)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(None, 1)]))
+    assert_equal(lp_cleaned.bounds, [(-np.inf, 1)] * 4)
+
+    lp_cleaned = _clean_inputs(lp._replace(bounds=[(None, None),
+                                                   (-np.inf, None),
+                                                   (None, np.inf),
+                                                   (-np.inf, np.inf)]))
+    assert_equal(lp_cleaned.bounds, [(-np.inf, np.inf)] * 4)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__numdiff.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__numdiff.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f695d94569438233afd1c3b3d2db2e390654f01
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__numdiff.py
@@ -0,0 +1,815 @@
+import math
+from itertools import product
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal, assert_
+from pytest import raises as assert_raises
+
+from scipy.sparse import csr_matrix, csc_matrix, lil_matrix
+
+from scipy.optimize._numdiff import (
+    _adjust_scheme_to_bounds, approx_derivative, check_derivative,
+    group_columns, _eps_for_method, _compute_absolute_step)
+
+
+def test_group_columns():
+    structure = [
+        [1, 1, 0, 0, 0, 0],
+        [1, 1, 1, 0, 0, 0],
+        [0, 1, 1, 1, 0, 0],
+        [0, 0, 1, 1, 1, 0],
+        [0, 0, 0, 1, 1, 1],
+        [0, 0, 0, 0, 1, 1],
+        [0, 0, 0, 0, 0, 0]
+    ]
+    for transform in [np.asarray, csr_matrix, csc_matrix, lil_matrix]:
+        A = transform(structure)
+        order = np.arange(6)
+        groups_true = np.array([0, 1, 2, 0, 1, 2])
+        groups = group_columns(A, order)
+        assert_equal(groups, groups_true)
+
+        order = [1, 2, 4, 3, 5, 0]
+        groups_true = np.array([2, 0, 1, 2, 0, 1])
+        groups = group_columns(A, order)
+        assert_equal(groups, groups_true)
+
+    # Test repeatability.
+    groups_1 = group_columns(A)
+    groups_2 = group_columns(A)
+    assert_equal(groups_1, groups_2)
+
+
+def test_correct_fp_eps():
+    # check that relative step size is correct for FP size
+    EPS = np.finfo(np.float64).eps
+    relative_step = {"2-point": EPS**0.5,
+                    "3-point": EPS**(1/3),
+                     "cs": EPS**0.5}
+    for method in ['2-point', '3-point', 'cs']:
+        assert_allclose(
+            _eps_for_method(np.float64, np.float64, method),
+            relative_step[method])
+        assert_allclose(
+            _eps_for_method(np.complex128, np.complex128, method),
+            relative_step[method]
+        )
+
+    # check another FP size
+    EPS = np.finfo(np.float32).eps
+    relative_step = {"2-point": EPS**0.5,
+                    "3-point": EPS**(1/3),
+                     "cs": EPS**0.5}
+
+    for method in ['2-point', '3-point', 'cs']:
+        assert_allclose(
+            _eps_for_method(np.float64, np.float32, method),
+            relative_step[method]
+        )
+        assert_allclose(
+            _eps_for_method(np.float32, np.float64, method),
+            relative_step[method]
+        )
+        assert_allclose(
+            _eps_for_method(np.float32, np.float32, method),
+            relative_step[method]
+        )
+
+
+class TestAdjustSchemeToBounds:
+    def test_no_bounds(self):
+        x0 = np.zeros(3)
+        h = np.full(3, 1e-2)
+        inf_lower = np.empty_like(x0)
+        inf_upper = np.empty_like(x0)
+        inf_lower.fill(-np.inf)
+        inf_upper.fill(np.inf)
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 1, '1-sided', inf_lower, inf_upper)
+        assert_allclose(h_adjusted, h)
+        assert_(np.all(one_sided))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 2, '1-sided', inf_lower, inf_upper)
+        assert_allclose(h_adjusted, h)
+        assert_(np.all(one_sided))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 1, '2-sided', inf_lower, inf_upper)
+        assert_allclose(h_adjusted, h)
+        assert_(np.all(~one_sided))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 2, '2-sided', inf_lower, inf_upper)
+        assert_allclose(h_adjusted, h)
+        assert_(np.all(~one_sided))
+
+    def test_with_bound(self):
+        x0 = np.array([0.0, 0.85, -0.85])
+        lb = -np.ones(3)
+        ub = np.ones(3)
+        h = np.array([1, 1, -1]) * 1e-1
+
+        h_adjusted, _ = _adjust_scheme_to_bounds(x0, h, 1, '1-sided', lb, ub)
+        assert_allclose(h_adjusted, h)
+
+        h_adjusted, _ = _adjust_scheme_to_bounds(x0, h, 2, '1-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([1, -1, 1]) * 1e-1)
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 1, '2-sided', lb, ub)
+        assert_allclose(h_adjusted, np.abs(h))
+        assert_(np.all(~one_sided))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 2, '2-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([1, -1, 1]) * 1e-1)
+        assert_equal(one_sided, np.array([False, True, True]))
+
+    def test_tight_bounds(self):
+        lb = np.array([-0.03, -0.03])
+        ub = np.array([0.05, 0.05])
+        x0 = np.array([0.0, 0.03])
+        h = np.array([-0.1, -0.1])
+
+        h_adjusted, _ = _adjust_scheme_to_bounds(x0, h, 1, '1-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([0.05, -0.06]))
+
+        h_adjusted, _ = _adjust_scheme_to_bounds(x0, h, 2, '1-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([0.025, -0.03]))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 1, '2-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([0.03, -0.03]))
+        assert_equal(one_sided, np.array([False, True]))
+
+        h_adjusted, one_sided = _adjust_scheme_to_bounds(
+            x0, h, 2, '2-sided', lb, ub)
+        assert_allclose(h_adjusted, np.array([0.015, -0.015]))
+        assert_equal(one_sided, np.array([False, True]))
+
+
+class TestApproxDerivativesDense:
+    def fun_scalar_scalar(self, x):
+        return np.sinh(x)
+
+    def jac_scalar_scalar(self, x):
+        return np.cosh(x)
+
+    def fun_scalar_vector(self, x):
+        return np.array([x[0]**2, np.tan(x[0]), np.exp(x[0])])
+
+    def jac_scalar_vector(self, x):
+        return np.array(
+            [2 * x[0], np.cos(x[0]) ** -2, np.exp(x[0])]).reshape(-1, 1)
+
+    def fun_vector_scalar(self, x):
+        return np.sin(x[0] * x[1]) * np.log(x[0])
+
+    def wrong_dimensions_fun(self, x):
+        return np.array([x**2, np.tan(x), np.exp(x)])
+
+    def jac_vector_scalar(self, x):
+        return np.array([
+            x[1] * np.cos(x[0] * x[1]) * np.log(x[0]) +
+            np.sin(x[0] * x[1]) / x[0],
+            x[0] * np.cos(x[0] * x[1]) * np.log(x[0])
+        ])
+
+    def fun_vector_vector(self, x):
+        return np.array([
+            x[0] * np.sin(x[1]),
+            x[1] * np.cos(x[0]),
+            x[0] ** 3 * x[1] ** -0.5
+        ])
+
+    def jac_vector_vector(self, x):
+        return np.array([
+            [np.sin(x[1]), x[0] * np.cos(x[1])],
+            [-x[1] * np.sin(x[0]), np.cos(x[0])],
+            [3 * x[0] ** 2 * x[1] ** -0.5, -0.5 * x[0] ** 3 * x[1] ** -1.5]
+        ])
+
+    def fun_parametrized(self, x, c0, c1=1.0):
+        return np.array([np.exp(c0 * x[0]), np.exp(c1 * x[1])])
+
+    def jac_parametrized(self, x, c0, c1=0.1):
+        return np.array([
+            [c0 * np.exp(c0 * x[0]), 0],
+            [0, c1 * np.exp(c1 * x[1])]
+        ])
+
+    def fun_with_nan(self, x):
+        return x if np.abs(x) <= 1e-8 else np.nan
+
+    def jac_with_nan(self, x):
+        return 1.0 if np.abs(x) <= 1e-8 else np.nan
+
+    def fun_zero_jacobian(self, x):
+        return np.array([x[0] * x[1], np.cos(x[0] * x[1])])
+
+    def jac_zero_jacobian(self, x):
+        return np.array([
+            [x[1], x[0]],
+            [-x[1] * np.sin(x[0] * x[1]), -x[0] * np.sin(x[0] * x[1])]
+        ])
+
+    def jac_non_numpy(self, x):
+        # x can be a scalar or an array [val].
+        # Cast to true scalar before handing over to math.exp
+        xp = np.asarray(x).item()
+        return math.exp(xp)
+
+    def test_scalar_scalar(self):
+        x0 = 1.0
+        jac_diff_2 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='2-point')
+        jac_diff_3 = approx_derivative(self.fun_scalar_scalar, x0)
+        jac_diff_4 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='cs')
+        jac_true = self.jac_scalar_scalar(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_scalar_scalar_abs_step(self):
+        # can approx_derivative use abs_step?
+        x0 = 1.0
+        jac_diff_2 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='2-point', abs_step=1.49e-8)
+        jac_diff_3 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       abs_step=1.49e-8)
+        jac_diff_4 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='cs', abs_step=1.49e-8)
+        jac_true = self.jac_scalar_scalar(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_scalar_vector(self):
+        x0 = 0.5
+        jac_diff_2 = approx_derivative(self.fun_scalar_vector, x0,
+                                       method='2-point')
+        jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0)
+        jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
+                                       method='cs')
+        jac_true = self.jac_scalar_vector(np.atleast_1d(x0))
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_vector_scalar(self):
+        x0 = np.array([100.0, -0.5])
+        jac_diff_2 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='2-point')
+        jac_diff_3 = approx_derivative(self.fun_vector_scalar, x0)
+        jac_diff_4 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='cs')
+        jac_true = self.jac_vector_scalar(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-7)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_vector_scalar_abs_step(self):
+        # can approx_derivative use abs_step?
+        x0 = np.array([100.0, -0.5])
+        jac_diff_2 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='2-point', abs_step=1.49e-8)
+        jac_diff_3 = approx_derivative(self.fun_vector_scalar, x0,
+                                       abs_step=1.49e-8, rel_step=np.inf)
+        jac_diff_4 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='cs', abs_step=1.49e-8)
+        jac_true = self.jac_vector_scalar(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=3e-9)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_vector_vector(self):
+        x0 = np.array([-100.0, 0.2])
+        jac_diff_2 = approx_derivative(self.fun_vector_vector, x0,
+                                       method='2-point')
+        jac_diff_3 = approx_derivative(self.fun_vector_vector, x0)
+        jac_diff_4 = approx_derivative(self.fun_vector_vector, x0,
+                                       method='cs')
+        jac_true = self.jac_vector_vector(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-5)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
+
+    def test_wrong_dimensions(self):
+        x0 = 1.0
+        assert_raises(RuntimeError, approx_derivative,
+                      self.wrong_dimensions_fun, x0)
+        f0 = self.wrong_dimensions_fun(np.atleast_1d(x0))
+        assert_raises(ValueError, approx_derivative,
+                      self.wrong_dimensions_fun, x0, f0=f0)
+
+    def test_custom_rel_step(self):
+        x0 = np.array([-0.1, 0.1])
+        jac_diff_2 = approx_derivative(self.fun_vector_vector, x0,
+                                       method='2-point', rel_step=1e-4)
+        jac_diff_3 = approx_derivative(self.fun_vector_vector, x0,
+                                       rel_step=1e-4)
+        jac_true = self.jac_vector_vector(x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-2)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-4)
+
+    def test_options(self):
+        x0 = np.array([1.0, 1.0])
+        c0 = -1.0
+        c1 = 1.0
+        lb = 0.0
+        ub = 2.0
+        f0 = self.fun_parametrized(x0, c0, c1=c1)
+        rel_step = np.array([-1e-6, 1e-7])
+        jac_true = self.jac_parametrized(x0, c0, c1)
+        jac_diff_2 = approx_derivative(
+            self.fun_parametrized, x0, method='2-point', rel_step=rel_step,
+            f0=f0, args=(c0,), kwargs=dict(c1=c1), bounds=(lb, ub))
+        jac_diff_3 = approx_derivative(
+            self.fun_parametrized, x0, rel_step=rel_step,
+            f0=f0, args=(c0,), kwargs=dict(c1=c1), bounds=(lb, ub))
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+
+    def test_with_bounds_2_point(self):
+        lb = -np.ones(2)
+        ub = np.ones(2)
+
+        x0 = np.array([-2.0, 0.2])
+        assert_raises(ValueError, approx_derivative,
+                      self.fun_vector_vector, x0, bounds=(lb, ub))
+
+        x0 = np.array([-1.0, 1.0])
+        jac_diff = approx_derivative(self.fun_vector_vector, x0,
+                                     method='2-point', bounds=(lb, ub))
+        jac_true = self.jac_vector_vector(x0)
+        assert_allclose(jac_diff, jac_true, rtol=1e-6)
+
+    def test_with_bounds_3_point(self):
+        lb = np.array([1.0, 1.0])
+        ub = np.array([2.0, 2.0])
+
+        x0 = np.array([1.0, 2.0])
+        jac_true = self.jac_vector_vector(x0)
+
+        jac_diff = approx_derivative(self.fun_vector_vector, x0)
+        assert_allclose(jac_diff, jac_true, rtol=1e-9)
+
+        jac_diff = approx_derivative(self.fun_vector_vector, x0,
+                                     bounds=(lb, np.inf))
+        assert_allclose(jac_diff, jac_true, rtol=1e-9)
+
+        jac_diff = approx_derivative(self.fun_vector_vector, x0,
+                                     bounds=(-np.inf, ub))
+        assert_allclose(jac_diff, jac_true, rtol=1e-9)
+
+        jac_diff = approx_derivative(self.fun_vector_vector, x0,
+                                     bounds=(lb, ub))
+        assert_allclose(jac_diff, jac_true, rtol=1e-9)
+
+    def test_tight_bounds(self):
+        x0 = np.array([10.0, 10.0])
+        lb = x0 - 3e-9
+        ub = x0 + 2e-9
+        jac_true = self.jac_vector_vector(x0)
+        jac_diff = approx_derivative(
+            self.fun_vector_vector, x0, method='2-point', bounds=(lb, ub))
+        assert_allclose(jac_diff, jac_true, rtol=1e-6)
+        jac_diff = approx_derivative(
+            self.fun_vector_vector, x0, method='2-point',
+            rel_step=1e-6, bounds=(lb, ub))
+        assert_allclose(jac_diff, jac_true, rtol=1e-6)
+
+        jac_diff = approx_derivative(
+            self.fun_vector_vector, x0, bounds=(lb, ub))
+        assert_allclose(jac_diff, jac_true, rtol=1e-6)
+        jac_diff = approx_derivative(
+            self.fun_vector_vector, x0, rel_step=1e-6, bounds=(lb, ub))
+        assert_allclose(jac_true, jac_diff, rtol=1e-6)
+
+    def test_bound_switches(self):
+        lb = -1e-8
+        ub = 1e-8
+        x0 = 0.0
+        jac_true = self.jac_with_nan(x0)
+        jac_diff_2 = approx_derivative(
+            self.fun_with_nan, x0, method='2-point', rel_step=1e-6,
+            bounds=(lb, ub))
+        jac_diff_3 = approx_derivative(
+            self.fun_with_nan, x0, rel_step=1e-6, bounds=(lb, ub))
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+
+        x0 = 1e-8
+        jac_true = self.jac_with_nan(x0)
+        jac_diff_2 = approx_derivative(
+            self.fun_with_nan, x0, method='2-point', rel_step=1e-6,
+            bounds=(lb, ub))
+        jac_diff_3 = approx_derivative(
+            self.fun_with_nan, x0, rel_step=1e-6, bounds=(lb, ub))
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
+
+    def test_non_numpy(self):
+        x0 = 1.0
+        jac_true = self.jac_non_numpy(x0)
+        jac_diff_2 = approx_derivative(self.jac_non_numpy, x0,
+                                       method='2-point')
+        jac_diff_3 = approx_derivative(self.jac_non_numpy, x0)
+        assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
+        assert_allclose(jac_diff_3, jac_true, rtol=1e-8)
+
+        # math.exp cannot handle complex arguments, hence this raises
+        assert_raises(TypeError, approx_derivative, self.jac_non_numpy, x0,
+                      **dict(method='cs'))
+
+    def test_fp(self):
+        # checks that approx_derivative works for FP size other than 64.
+        # Example is derived from the minimal working example in gh12991.
+        np.random.seed(1)
+
+        def func(p, x):
+            return p[0] + p[1] * x
+
+        def err(p, x, y):
+            return func(p, x) - y
+
+        x = np.linspace(0, 1, 100, dtype=np.float64)
+        y = np.random.random(100).astype(np.float64)
+        p0 = np.array([-1.0, -1.0])
+
+        jac_fp64 = approx_derivative(err, p0, method='2-point', args=(x, y))
+
+        # parameter vector is float32, func output is float64
+        jac_fp = approx_derivative(err, p0.astype(np.float32),
+                                   method='2-point', args=(x, y))
+        assert err(p0, x, y).dtype == np.float64
+        assert_allclose(jac_fp, jac_fp64, atol=1e-3)
+
+        # parameter vector is float64, func output is float32
+        def err_fp32(p):
+            assert p.dtype == np.float32
+            return err(p, x, y).astype(np.float32)
+
+        jac_fp = approx_derivative(err_fp32, p0.astype(np.float32),
+                                   method='2-point')
+        assert_allclose(jac_fp, jac_fp64, atol=1e-3)
+
+        # check upper bound of error on the derivative for 2-point
+        def f(x):
+            return np.sin(x)
+        def g(x):
+            return np.cos(x)
+        def hess(x):
+            return -np.sin(x)
+
+        def calc_atol(h, x0, f, hess, EPS):
+            # truncation error
+            t0 = h / 2 * max(np.abs(hess(x0)), np.abs(hess(x0 + h)))
+            # roundoff error. There may be a divisor (>1) missing from
+            # the following line, so this contribution is possibly
+            # overestimated
+            t1 = EPS / h * max(np.abs(f(x0)), np.abs(f(x0 + h)))
+            return t0 + t1
+
+        for dtype in [np.float16, np.float32, np.float64]:
+            EPS = np.finfo(dtype).eps
+            x0 = np.array(1.0).astype(dtype)
+            h = _compute_absolute_step(None, x0, f(x0), '2-point')
+            atol = calc_atol(h, x0, f, hess, EPS)
+            err = approx_derivative(f, x0, method='2-point',
+                                    abs_step=h) - g(x0)
+            assert abs(err) < atol
+
+    def test_check_derivative(self):
+        x0 = np.array([-10.0, 10])
+        accuracy = check_derivative(self.fun_vector_vector,
+                                    self.jac_vector_vector, x0)
+        assert_(accuracy < 1e-9)
+        accuracy = check_derivative(self.fun_vector_vector,
+                                    self.jac_vector_vector, x0)
+        assert_(accuracy < 1e-6)
+
+        x0 = np.array([0.0, 0.0])
+        accuracy = check_derivative(self.fun_zero_jacobian,
+                                    self.jac_zero_jacobian, x0)
+        assert_(accuracy == 0)
+        accuracy = check_derivative(self.fun_zero_jacobian,
+                                    self.jac_zero_jacobian, x0)
+        assert_(accuracy == 0)
+
+
+class TestApproxDerivativeSparse:
+    # Example from Numerical Optimization 2nd edition, p. 198.
+    def setup_method(self):
+        np.random.seed(0)
+        self.n = 50
+        self.lb = -0.1 * (1 + np.arange(self.n))
+        self.ub = 0.1 * (1 + np.arange(self.n))
+        self.x0 = np.empty(self.n)
+        self.x0[::2] = (1 - 1e-7) * self.lb[::2]
+        self.x0[1::2] = (1 - 1e-7) * self.ub[1::2]
+
+        self.J_true = self.jac(self.x0)
+
+    def fun(self, x):
+        e = x[1:]**3 - x[:-1]**2
+        return np.hstack((0, 3 * e)) + np.hstack((2 * e, 0))
+
+    def jac(self, x):
+        n = x.size
+        J = np.zeros((n, n))
+        J[0, 0] = -4 * x[0]
+        J[0, 1] = 6 * x[1]**2
+        for i in range(1, n - 1):
+            J[i, i - 1] = -6 * x[i-1]
+            J[i, i] = 9 * x[i]**2 - 4 * x[i]
+            J[i, i + 1] = 6 * x[i+1]**2
+        J[-1, -1] = 9 * x[-1]**2
+        J[-1, -2] = -6 * x[-2]
+
+        return J
+
+    def structure(self, n):
+        A = np.zeros((n, n), dtype=int)
+        A[0, 0] = 1
+        A[0, 1] = 1
+        for i in range(1, n - 1):
+            A[i, i - 1: i + 2] = 1
+        A[-1, -1] = 1
+        A[-1, -2] = 1
+
+        return A
+
+    def test_all(self):
+        A = self.structure(self.n)
+        order = np.arange(self.n)
+        groups_1 = group_columns(A, order)
+        np.random.shuffle(order)
+        groups_2 = group_columns(A, order)
+
+        for method, groups, l, u in product(
+                ['2-point', '3-point', 'cs'], [groups_1, groups_2],
+                [-np.inf, self.lb], [np.inf, self.ub]):
+            J = approx_derivative(self.fun, self.x0, method=method,
+                                  bounds=(l, u), sparsity=(A, groups))
+            assert_(isinstance(J, csr_matrix))
+            assert_allclose(J.toarray(), self.J_true, rtol=1e-6)
+
+            rel_step = np.full_like(self.x0, 1e-8)
+            rel_step[::2] *= -1
+            J = approx_derivative(self.fun, self.x0, method=method,
+                                  rel_step=rel_step, sparsity=(A, groups))
+            assert_allclose(J.toarray(), self.J_true, rtol=1e-5)
+
+    def test_no_precomputed_groups(self):
+        A = self.structure(self.n)
+        J = approx_derivative(self.fun, self.x0, sparsity=A)
+        assert_allclose(J.toarray(), self.J_true, rtol=1e-6)
+
+    def test_equivalence(self):
+        structure = np.ones((self.n, self.n), dtype=int)
+        groups = np.arange(self.n)
+        for method in ['2-point', '3-point', 'cs']:
+            J_dense = approx_derivative(self.fun, self.x0, method=method)
+            J_sparse = approx_derivative(
+                self.fun, self.x0, sparsity=(structure, groups), method=method)
+            assert_allclose(J_dense, J_sparse.toarray(),
+                            rtol=5e-16, atol=7e-15)
+
+    def test_check_derivative(self):
+        def jac(x):
+            return csr_matrix(self.jac(x))
+
+        accuracy = check_derivative(self.fun, jac, self.x0,
+                                    bounds=(self.lb, self.ub))
+        assert_(accuracy < 1e-9)
+
+        accuracy = check_derivative(self.fun, jac, self.x0,
+                                    bounds=(self.lb, self.ub))
+        assert_(accuracy < 1e-9)
+
+
+class TestApproxDerivativeLinearOperator:
+
+    def fun_scalar_scalar(self, x):
+        return np.sinh(x)
+
+    def jac_scalar_scalar(self, x):
+        return np.cosh(x)
+
+    def fun_scalar_vector(self, x):
+        return np.array([x[0]**2, np.tan(x[0]), np.exp(x[0])])
+
+    def jac_scalar_vector(self, x):
+        return np.array(
+            [2 * x[0], np.cos(x[0]) ** -2, np.exp(x[0])]).reshape(-1, 1)
+
+    def fun_vector_scalar(self, x):
+        return np.sin(x[0] * x[1]) * np.log(x[0])
+
+    def jac_vector_scalar(self, x):
+        return np.array([
+            x[1] * np.cos(x[0] * x[1]) * np.log(x[0]) +
+            np.sin(x[0] * x[1]) / x[0],
+            x[0] * np.cos(x[0] * x[1]) * np.log(x[0])
+        ])
+
+    def fun_vector_vector(self, x):
+        return np.array([
+            x[0] * np.sin(x[1]),
+            x[1] * np.cos(x[0]),
+            x[0] ** 3 * x[1] ** -0.5
+        ])
+
+    def jac_vector_vector(self, x):
+        return np.array([
+            [np.sin(x[1]), x[0] * np.cos(x[1])],
+            [-x[1] * np.sin(x[0]), np.cos(x[0])],
+            [3 * x[0] ** 2 * x[1] ** -0.5, -0.5 * x[0] ** 3 * x[1] ** -1.5]
+        ])
+
+    def test_scalar_scalar(self):
+        x0 = 1.0
+        jac_diff_2 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='2-point',
+                                       as_linear_operator=True)
+        jac_diff_3 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       as_linear_operator=True)
+        jac_diff_4 = approx_derivative(self.fun_scalar_scalar, x0,
+                                       method='cs',
+                                       as_linear_operator=True)
+        jac_true = self.jac_scalar_scalar(x0)
+        np.random.seed(1)
+        for i in range(10):
+            p = np.random.uniform(-10, 10, size=(1,))
+            assert_allclose(jac_diff_2.dot(p), jac_true*p,
+                            rtol=1e-5)
+            assert_allclose(jac_diff_3.dot(p), jac_true*p,
+                            rtol=5e-6)
+            assert_allclose(jac_diff_4.dot(p), jac_true*p,
+                            rtol=5e-6)
+
+    def test_scalar_vector(self):
+        x0 = 0.5
+        jac_diff_2 = approx_derivative(self.fun_scalar_vector, x0,
+                                       method='2-point',
+                                       as_linear_operator=True)
+        jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0,
+                                       as_linear_operator=True)
+        jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
+                                       method='cs',
+                                       as_linear_operator=True)
+        jac_true = self.jac_scalar_vector(np.atleast_1d(x0))
+        np.random.seed(1)
+        for i in range(10):
+            p = np.random.uniform(-10, 10, size=(1,))
+            assert_allclose(jac_diff_2.dot(p), jac_true.dot(p),
+                            rtol=1e-5)
+            assert_allclose(jac_diff_3.dot(p), jac_true.dot(p),
+                            rtol=5e-6)
+            assert_allclose(jac_diff_4.dot(p), jac_true.dot(p),
+                            rtol=5e-6)
+
+    def test_vector_scalar(self):
+        x0 = np.array([100.0, -0.5])
+        jac_diff_2 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='2-point',
+                                       as_linear_operator=True)
+        jac_diff_3 = approx_derivative(self.fun_vector_scalar, x0,
+                                       as_linear_operator=True)
+        jac_diff_4 = approx_derivative(self.fun_vector_scalar, x0,
+                                       method='cs',
+                                       as_linear_operator=True)
+        jac_true = self.jac_vector_scalar(x0)
+        np.random.seed(1)
+        for i in range(10):
+            p = np.random.uniform(-10, 10, size=x0.shape)
+            assert_allclose(jac_diff_2.dot(p), np.atleast_1d(jac_true.dot(p)),
+                            rtol=1e-5)
+            assert_allclose(jac_diff_3.dot(p), np.atleast_1d(jac_true.dot(p)),
+                            rtol=5e-6)
+            assert_allclose(jac_diff_4.dot(p), np.atleast_1d(jac_true.dot(p)),
+                            rtol=1e-7)
+
+    def test_vector_vector(self):
+        x0 = np.array([-100.0, 0.2])
+        jac_diff_2 = approx_derivative(self.fun_vector_vector, x0,
+                                       method='2-point',
+                                       as_linear_operator=True)
+        jac_diff_3 = approx_derivative(self.fun_vector_vector, x0,
+                                       as_linear_operator=True)
+        jac_diff_4 = approx_derivative(self.fun_vector_vector, x0,
+                                       method='cs',
+                                       as_linear_operator=True)
+        jac_true = self.jac_vector_vector(x0)
+        np.random.seed(1)
+        for i in range(10):
+            p = np.random.uniform(-10, 10, size=x0.shape)
+            assert_allclose(jac_diff_2.dot(p), jac_true.dot(p), rtol=1e-5)
+            assert_allclose(jac_diff_3.dot(p), jac_true.dot(p), rtol=1e-6)
+            assert_allclose(jac_diff_4.dot(p), jac_true.dot(p), rtol=1e-7)
+
+    def test_exception(self):
+        x0 = np.array([-100.0, 0.2])
+        assert_raises(ValueError, approx_derivative,
+                      self.fun_vector_vector, x0,
+                      method='2-point', bounds=(1, np.inf))
+
+
+def test_absolute_step_sign():
+    # test for gh12487
+    # if an absolute step is specified for 2-point differences make sure that
+    # the side corresponds to the step. i.e. if step is positive then forward
+    # differences should be used, if step is negative then backwards
+    # differences should be used.
+
+    # function has double discontinuity at x = [-1, -1]
+    # first component is \/, second component is /\
+    def f(x):
+        return -np.abs(x[0] + 1) + np.abs(x[1] + 1)
+
+    # check that the forward difference is used
+    grad = approx_derivative(f, [-1, -1], method='2-point', abs_step=1e-8)
+    assert_allclose(grad, [-1.0, 1.0])
+
+    # check that the backwards difference is used
+    grad = approx_derivative(f, [-1, -1], method='2-point', abs_step=-1e-8)
+    assert_allclose(grad, [1.0, -1.0])
+
+    # check that the forwards difference is used with a step for both
+    # parameters
+    grad = approx_derivative(
+        f, [-1, -1], method='2-point', abs_step=[1e-8, 1e-8]
+    )
+    assert_allclose(grad, [-1.0, 1.0])
+
+    # check that we can mix forward/backwards steps.
+    grad = approx_derivative(
+        f, [-1, -1], method='2-point', abs_step=[1e-8, -1e-8]
+     )
+    assert_allclose(grad, [-1.0, -1.0])
+    grad = approx_derivative(
+        f, [-1, -1], method='2-point', abs_step=[-1e-8, 1e-8]
+    )
+    assert_allclose(grad, [1.0, 1.0])
+
+    # the forward step should reverse to a backwards step if it runs into a
+    # bound
+    # This is kind of tested in TestAdjustSchemeToBounds, but only for a lower level
+    # function.
+    grad = approx_derivative(
+        f, [-1, -1], method='2-point', abs_step=1e-8,
+        bounds=(-np.inf, -1)
+    )
+    assert_allclose(grad, [1.0, -1.0])
+
+    grad = approx_derivative(
+        f, [-1, -1], method='2-point', abs_step=-1e-8, bounds=(-1, np.inf)
+    )
+    assert_allclose(grad, [-1.0, 1.0])
+
+
+def test__compute_absolute_step():
+    # tests calculation of absolute step from rel_step
+    methods = ['2-point', '3-point', 'cs']
+
+    x0 = np.array([1e-5, 0, 1, 1e5])
+
+    EPS = np.finfo(np.float64).eps
+    relative_step = {
+        "2-point": EPS**0.5,
+        "3-point": EPS**(1/3),
+        "cs": EPS**0.5
+    }
+    f0 = np.array(1.0)
+
+    for method in methods:
+        rel_step = relative_step[method]
+        correct_step = np.array([rel_step,
+                                 rel_step * 1.,
+                                 rel_step * 1.,
+                                 rel_step * np.abs(x0[3])])
+
+        abs_step = _compute_absolute_step(None, x0, f0, method)
+        assert_allclose(abs_step, correct_step)
+
+        sign_x0 = (-x0 >= 0).astype(float) * 2 - 1
+        abs_step = _compute_absolute_step(None, -x0, f0, method)
+        assert_allclose(abs_step, sign_x0 * correct_step)
+
+    # if a relative step is provided it should be used
+    rel_step = np.array([0.1, 1, 10, 100])
+    correct_step = np.array([rel_step[0] * x0[0],
+                             relative_step['2-point'],
+                             rel_step[2] * 1.,
+                             rel_step[3] * np.abs(x0[3])])
+
+    abs_step = _compute_absolute_step(rel_step, x0, f0, '2-point')
+    assert_allclose(abs_step, correct_step)
+
+    sign_x0 = (-x0 >= 0).astype(float) * 2 - 1
+    abs_step = _compute_absolute_step(rel_step, -x0, f0, '2-point')
+    assert_allclose(abs_step, sign_x0 * correct_step)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__remove_redundancy.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__remove_redundancy.py
new file mode 100644
index 0000000000000000000000000000000000000000..817282011699dea333042a4173f65c999a2925fc
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__remove_redundancy.py
@@ -0,0 +1,228 @@
+"""
+Unit test for Linear Programming via Simplex Algorithm.
+"""
+
+# TODO: add tests for:
+# https://github.com/scipy/scipy/issues/5400
+# https://github.com/scipy/scipy/issues/6690
+
+import numpy as np
+from numpy.testing import (
+    assert_,
+    assert_allclose,
+    assert_equal)
+
+from .test_linprog import magic_square
+from scipy.optimize._remove_redundancy import _remove_redundancy_svd
+from scipy.optimize._remove_redundancy import _remove_redundancy_pivot_dense
+from scipy.optimize._remove_redundancy import _remove_redundancy_pivot_sparse
+from scipy.optimize._remove_redundancy import _remove_redundancy_id
+
+from scipy.sparse import csc_matrix
+
+
+def setup_module():
+    np.random.seed(2017)
+
+
+def redundancy_removed(A, B):
+    """Checks whether a matrix contains only independent rows of another"""
+    for rowA in A:
+        # `rowA in B` is not a reliable check
+        for rowB in B:
+            if np.all(rowA == rowB):
+                break
+        else:
+            return False
+    return A.shape[0] == np.linalg.matrix_rank(A) == np.linalg.matrix_rank(B)
+
+
+class RRCommonTests:
+    def test_no_redundancy(self):
+        m, n = 10, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_allclose(A0, A1)
+        assert_allclose(b0, b1)
+        assert_equal(status, 0)
+
+    def test_infeasible_zero_row(self):
+        A = np.eye(3)
+        A[1, :] = 0
+        b = np.random.rand(3)
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 2)
+
+    def test_remove_zero_row(self):
+        A = np.eye(3)
+        A[1, :] = 0
+        b = np.random.rand(3)
+        b[1] = 0
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_allclose(A1, A[[0, 2], :])
+        assert_allclose(b1, b[[0, 2]])
+
+    def test_infeasible_m_gt_n(self):
+        m, n = 20, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 2)
+
+    def test_infeasible_m_eq_n(self):
+        m, n = 10, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        A0[-1, :] = 2 * A0[-2, :]
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 2)
+
+    def test_infeasible_m_lt_n(self):
+        m, n = 9, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        A0[-1, :] = np.arange(m - 1).dot(A0[:-1])
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 2)
+
+    def test_m_gt_n(self):
+        np.random.seed(2032)
+        m, n = 20, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        x = np.linalg.solve(A0[:n, :], b0[:n])
+        b0[n:] = A0[n:, :].dot(x)
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], n)
+        assert_equal(np.linalg.matrix_rank(A1), n)
+
+    def test_m_gt_n_rank_deficient(self):
+        m, n = 20, 10
+        A0 = np.zeros((m, n))
+        A0[:, 0] = 1
+        b0 = np.ones(m)
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 0)
+        assert_allclose(A1, A0[0:1, :])
+        assert_allclose(b1, b0[0])
+
+    def test_m_lt_n_rank_deficient(self):
+        m, n = 9, 10
+        A0 = np.random.rand(m, n)
+        b0 = np.random.rand(m)
+        A0[-1, :] = np.arange(m - 1).dot(A0[:-1])
+        b0[-1] = np.arange(m - 1).dot(b0[:-1])
+        A1, b1, status, message = self.rr(A0, b0)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], 8)
+        assert_equal(np.linalg.matrix_rank(A1), 8)
+
+    def test_dense1(self):
+        A = np.ones((6, 6))
+        A[0, :3] = 0
+        A[1, 3:] = 0
+        A[3:, ::2] = -1
+        A[3, :2] = 0
+        A[4, 2:] = 0
+        b = np.zeros(A.shape[0])
+
+        A1, b1, status, message = self.rr(A, b)
+        assert_(redundancy_removed(A1, A))
+        assert_equal(status, 0)
+
+    def test_dense2(self):
+        A = np.eye(6)
+        A[-2, -1] = 1
+        A[-1, :] = 1
+        b = np.zeros(A.shape[0])
+        A1, b1, status, message = self.rr(A, b)
+        assert_(redundancy_removed(A1, A))
+        assert_equal(status, 0)
+
+    def test_dense3(self):
+        A = np.eye(6)
+        A[-2, -1] = 1
+        A[-1, :] = 1
+        b = np.random.rand(A.shape[0])
+        b[-1] = np.sum(b[:-1])
+        A1, b1, status, message = self.rr(A, b)
+        assert_(redundancy_removed(A1, A))
+        assert_equal(status, 0)
+
+    def test_m_gt_n_sparse(self):
+        np.random.seed(2013)
+        m, n = 20, 5
+        p = 0.1
+        A = np.random.rand(m, n)
+        A[np.random.rand(m, n) > p] = 0
+        rank = np.linalg.matrix_rank(A)
+        b = np.zeros(A.shape[0])
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], rank)
+        assert_equal(np.linalg.matrix_rank(A1), rank)
+
+    def test_m_lt_n_sparse(self):
+        np.random.seed(2017)
+        m, n = 20, 50
+        p = 0.05
+        A = np.random.rand(m, n)
+        A[np.random.rand(m, n) > p] = 0
+        rank = np.linalg.matrix_rank(A)
+        b = np.zeros(A.shape[0])
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], rank)
+        assert_equal(np.linalg.matrix_rank(A1), rank)
+
+    def test_m_eq_n_sparse(self):
+        np.random.seed(2017)
+        m, n = 100, 100
+        p = 0.01
+        A = np.random.rand(m, n)
+        A[np.random.rand(m, n) > p] = 0
+        rank = np.linalg.matrix_rank(A)
+        b = np.zeros(A.shape[0])
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], rank)
+        assert_equal(np.linalg.matrix_rank(A1), rank)
+
+    def test_magic_square(self):
+        A, b, c, numbers, _ = magic_square(3)
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], 23)
+        assert_equal(np.linalg.matrix_rank(A1), 23)
+
+    def test_magic_square2(self):
+        A, b, c, numbers, _ = magic_square(4)
+        A1, b1, status, message = self.rr(A, b)
+        assert_equal(status, 0)
+        assert_equal(A1.shape[0], 39)
+        assert_equal(np.linalg.matrix_rank(A1), 39)
+
+
+class TestRRSVD(RRCommonTests):
+    def rr(self, A, b):
+        return _remove_redundancy_svd(A, b)
+
+
+class TestRRPivotDense(RRCommonTests):
+    def rr(self, A, b):
+        return _remove_redundancy_pivot_dense(A, b)
+
+
+class TestRRID(RRCommonTests):
+    def rr(self, A, b):
+        return _remove_redundancy_id(A, b)
+
+
+class TestRRPivotSparse(RRCommonTests):
+    def rr(self, A, b):
+        rr_res = _remove_redundancy_pivot_sparse(csc_matrix(A), b)
+        A1, b1, status, message = rr_res
+        return A1.toarray(), b1, status, message
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__root.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__root.py
new file mode 100644
index 0000000000000000000000000000000000000000..3827651a8e513f9543a2b0af3fbbc49cd82915a1
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__root.py
@@ -0,0 +1,123 @@
+"""
+Unit tests for optimization routines from _root.py.
+"""
+from numpy.testing import assert_, assert_equal
+import pytest
+from pytest import raises as assert_raises, warns as assert_warns
+import numpy as np
+
+from scipy.optimize import root
+
+
+class TestRoot:
+    def test_tol_parameter(self):
+        # Check that the minimize() tol= argument does something
+        def func(z):
+            x, y = z
+            return np.array([x**3 - 1, y**3 - 1])
+
+        def dfunc(z):
+            x, y = z
+            return np.array([[3*x**2, 0], [0, 3*y**2]])
+
+        for method in ['hybr', 'lm', 'broyden1', 'broyden2', 'anderson',
+                       'diagbroyden', 'krylov']:
+            if method in ('linearmixing', 'excitingmixing'):
+                # doesn't converge
+                continue
+
+            if method in ('hybr', 'lm'):
+                jac = dfunc
+            else:
+                jac = None
+
+            sol1 = root(func, [1.1,1.1], jac=jac, tol=1e-4, method=method)
+            sol2 = root(func, [1.1,1.1], jac=jac, tol=0.5, method=method)
+            msg = f"{method}: {func(sol1.x)} vs. {func(sol2.x)}"
+            assert_(sol1.success, msg)
+            assert_(sol2.success, msg)
+            assert_(abs(func(sol1.x)).max() < abs(func(sol2.x)).max(),
+                    msg)
+
+    def test_tol_norm(self):
+
+        def norm(x):
+            return abs(x[0])
+
+        for method in ['excitingmixing',
+                       'diagbroyden',
+                       'linearmixing',
+                       'anderson',
+                       'broyden1',
+                       'broyden2',
+                       'krylov']:
+
+            root(np.zeros_like, np.zeros(2), method=method,
+                options={"tol_norm": norm})
+
+    def test_minimize_scalar_coerce_args_param(self):
+        # github issue #3503
+        def func(z, f=1):
+            x, y = z
+            return np.array([x**3 - 1, y**3 - f])
+        root(func, [1.1, 1.1], args=1.5)
+
+    def test_f_size(self):
+        # gh8320
+        # check that decreasing the size of the returned array raises an error
+        # and doesn't segfault
+        class fun:
+            def __init__(self):
+                self.count = 0
+
+            def __call__(self, x):
+                self.count += 1
+
+                if not (self.count % 5):
+                    ret = x[0] + 0.5 * (x[0] - x[1]) ** 3 - 1.0
+                else:
+                    ret = ([x[0] + 0.5 * (x[0] - x[1]) ** 3 - 1.0,
+                           0.5 * (x[1] - x[0]) ** 3 + x[1]])
+
+                return ret
+
+        F = fun()
+        with assert_raises(ValueError):
+            root(F, [0.1, 0.0], method='lm')
+
+    def test_gh_10370(self):
+        # gh-10370 reported that passing both `args` and `jac` to `root` with
+        # `method='krylov'` caused a failure. Ensure that this is fixed whether
+        # the gradient is passed via `jac` or as a second output of `fun`.
+        def fun(x, ignored):
+            return [3*x[0] - 0.25*x[1]**2 + 10, 0.1*x[0]**2 + 5*x[1] - 2]
+
+        def grad(x, ignored):
+            return [[3, 0.5 * x[1]], [0.2 * x[0], 5]]
+
+        def fun_grad(x, ignored):
+            return fun(x, ignored), grad(x, ignored)
+
+        x0 = np.zeros(2)
+
+        ref = root(fun, x0, args=(1,), method='krylov')
+        message = 'Method krylov does not use the jacobian'
+        with assert_warns(RuntimeWarning, match=message):
+            res1 = root(fun, x0, args=(1,), method='krylov', jac=grad)
+        with assert_warns(RuntimeWarning, match=message):
+            res2 = root(fun_grad, x0, args=(1,), method='krylov', jac=True)
+
+        assert_equal(res1.x, ref.x)
+        assert_equal(res2.x, ref.x)
+        assert res1.success is res2.success is ref.success is True
+    
+    @pytest.mark.parametrize("method", ["hybr", "lm", "broyden1", "broyden2",
+                                        "anderson", "linearmixing",
+                                        "diagbroyden", "excitingmixing",
+                                        "krylov", "df-sane"])
+    def test_method_in_result(self, method):
+        def func(x):
+            return x - 1
+        
+        res = root(func, x0=[1], method=method)
+        assert res.method == method
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__shgo.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__shgo.py
new file mode 100644
index 0000000000000000000000000000000000000000..fea1fb70fbdaf98bc081c8d6c9b8726b89e74043
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__shgo.py
@@ -0,0 +1,1155 @@
+import logging
+import sys
+
+import numpy as np
+import time
+from multiprocessing import Pool
+from numpy.testing import assert_allclose, IS_PYPY
+import pytest
+from pytest import raises as assert_raises, warns
+from scipy.optimize import (shgo, Bounds, minimize_scalar, minimize, rosen,
+                            rosen_der, rosen_hess, NonlinearConstraint)
+from scipy.optimize._constraints import new_constraint_to_old
+from scipy.optimize._shgo import SHGO
+
+
+class StructTestFunction:
+    def __init__(self, bounds, expected_x, expected_fun=None,
+                 expected_xl=None, expected_funl=None):
+        self.bounds = bounds
+        self.expected_x = expected_x
+        self.expected_fun = expected_fun
+        self.expected_xl = expected_xl
+        self.expected_funl = expected_funl
+
+
+def wrap_constraints(g):
+    cons = []
+    if g is not None:
+        if not isinstance(g, (tuple, list)):
+            g = (g,)
+        else:
+            pass
+        for g in g:
+            cons.append({'type': 'ineq',
+                         'fun': g})
+        cons = tuple(cons)
+    else:
+        cons = None
+    return cons
+
+
+class StructTest1(StructTestFunction):
+    def f(self, x):
+        return x[0] ** 2 + x[1] ** 2
+
+    def g(x):
+        return -(np.sum(x, axis=0) - 6.0)
+
+    cons = wrap_constraints(g)
+
+
+test1_1 = StructTest1(bounds=[(-1, 6), (-1, 6)],
+                      expected_x=[0, 0])
+test1_2 = StructTest1(bounds=[(0, 1), (0, 1)],
+                      expected_x=[0, 0])
+test1_3 = StructTest1(bounds=[(None, None), (None, None)],
+                      expected_x=[0, 0])
+
+
+class StructTest2(StructTestFunction):
+    """
+    Scalar function with several minima to test all minimiser retrievals
+    """
+
+    def f(self, x):
+        return (x - 30) * np.sin(x)
+
+    def g(x):
+        return 58 - np.sum(x, axis=0)
+
+    cons = wrap_constraints(g)
+
+
+test2_1 = StructTest2(bounds=[(0, 60)],
+                      expected_x=[1.53567906],
+                      expected_fun=-28.44677132,
+                      # Important: test that funl return is in the correct
+                      # order
+                      expected_xl=np.array([[1.53567906],
+                                            [55.01782167],
+                                            [7.80894889],
+                                            [48.74797493],
+                                            [14.07445705],
+                                            [42.4913859],
+                                            [20.31743841],
+                                            [36.28607535],
+                                            [26.43039605],
+                                            [30.76371366]]),
+
+                      expected_funl=np.array([-28.44677132, -24.99785984,
+                                              -22.16855376, -18.72136195,
+                                              -15.89423937, -12.45154942,
+                                              -9.63133158, -6.20801301,
+                                              -3.43727232, -0.46353338])
+                      )
+
+test2_2 = StructTest2(bounds=[(0, 4.5)],
+                      expected_x=[1.53567906],
+                      expected_fun=[-28.44677132],
+                      expected_xl=np.array([[1.53567906]]),
+                      expected_funl=np.array([-28.44677132])
+                      )
+
+
+class StructTest3(StructTestFunction):
+    """
+    Hock and Schittkowski 18 problem (HS18). Hoch and Schittkowski (1981)
+    http://www.ai7.uni-bayreuth.de/test_problem_coll.pdf
+    Minimize: f = 0.01 * (x_1)**2 + (x_2)**2
+
+    Subject to: x_1 * x_2 - 25.0 >= 0,
+                (x_1)**2 + (x_2)**2 - 25.0 >= 0,
+                2 <= x_1 <= 50,
+                0 <= x_2 <= 50.
+
+    Approx. Answer:
+        f([(250)**0.5 , (2.5)**0.5]) = 5.0
+
+
+    """
+
+    # amended to test vectorisation of constraints
+    def f(self, x):
+        return 0.01 * (x[0]) ** 2 + (x[1]) ** 2
+
+    def g1(x):
+        return x[0] * x[1] - 25.0
+
+    def g2(x):
+        return x[0] ** 2 + x[1] ** 2 - 25.0
+
+    # g = (g1, g2)
+    # cons = wrap_constraints(g)
+
+    def g(x):
+        return x[0] * x[1] - 25.0, x[0] ** 2 + x[1] ** 2 - 25.0
+
+    # this checks that shgo can be sent new-style constraints
+    __nlc = NonlinearConstraint(g, 0, np.inf)
+    cons = (__nlc,)
+
+test3_1 = StructTest3(bounds=[(2, 50), (0, 50)],
+                      expected_x=[250 ** 0.5, 2.5 ** 0.5],
+                      expected_fun=5.0
+                      )
+
+
+class StructTest4(StructTestFunction):
+    """
+    Hock and Schittkowski 11 problem (HS11). Hoch and Schittkowski (1981)
+
+    NOTE: Did not find in original reference to HS collection, refer to
+          Henderson (2015) problem 7 instead. 02.03.2016
+    """
+
+    def f(self, x):
+        return ((x[0] - 10) ** 2 + 5 * (x[1] - 12) ** 2 + x[2] ** 4
+                + 3 * (x[3] - 11) ** 2 + 10 * x[4] ** 6 + 7 * x[5] ** 2 + x[
+                    6] ** 4
+                - 4 * x[5] * x[6] - 10 * x[5] - 8 * x[6]
+                )
+
+    def g1(x):
+        return -(2 * x[0] ** 2 + 3 * x[1] ** 4 + x[2] + 4 * x[3] ** 2
+                 + 5 * x[4] - 127)
+
+    def g2(x):
+        return -(7 * x[0] + 3 * x[1] + 10 * x[2] ** 2 + x[3] - x[4] - 282.0)
+
+    def g3(x):
+        return -(23 * x[0] + x[1] ** 2 + 6 * x[5] ** 2 - 8 * x[6] - 196)
+
+    def g4(x):
+        return -(4 * x[0] ** 2 + x[1] ** 2 - 3 * x[0] * x[1] + 2 * x[2] ** 2
+                 + 5 * x[5] - 11 * x[6])
+
+    g = (g1, g2, g3, g4)
+
+    cons = wrap_constraints(g)
+
+
+test4_1 = StructTest4(bounds=[(-10, 10), ] * 7,
+                      expected_x=[2.330499, 1.951372, -0.4775414,
+                                  4.365726, -0.6244870, 1.038131, 1.594227],
+                      expected_fun=680.6300573
+                      )
+
+
+class StructTest5(StructTestFunction):
+    def f(self, x):
+        return (
+            -(x[1] + 47.0)*np.sin(np.sqrt(abs(x[0]/2.0 + (x[1] + 47.0))))
+            - x[0]*np.sin(np.sqrt(abs(x[0] - (x[1] + 47.0))))
+        )
+
+    g = None
+    cons = wrap_constraints(g)
+
+
+test5_1 = StructTest5(bounds=[(-512, 512), (-512, 512)],
+                      expected_fun=[-959.64066272085051],
+                      expected_x=[512., 404.23180542])
+
+
+class StructTestLJ(StructTestFunction):
+    """
+    LennardJones objective function. Used to test symmetry constraints
+    settings.
+    """
+
+    def f(self, x, *args):
+        print(f'x = {x}')
+        self.N = args[0]
+        k = int(self.N / 3)
+        s = 0.0
+
+        for i in range(k - 1):
+            for j in range(i + 1, k):
+                a = 3 * i
+                b = 3 * j
+                xd = x[a] - x[b]
+                yd = x[a + 1] - x[b + 1]
+                zd = x[a + 2] - x[b + 2]
+                ed = xd * xd + yd * yd + zd * zd
+                ud = ed * ed * ed
+                if ed > 0.0:
+                    s += (1.0 / ud - 2.0) / ud
+
+        return s
+
+    g = None
+    cons = wrap_constraints(g)
+
+
+N = 6
+boundsLJ = list(zip([-4.0] * 6, [4.0] * 6))
+
+testLJ = StructTestLJ(bounds=boundsLJ,
+                      expected_fun=[-1.0],
+                      expected_x=None,
+                      # expected_x=[-2.71247337e-08,
+                      #            -2.71247337e-08,
+                      #            -2.50000222e+00,
+                      #            -2.71247337e-08,
+                      #            -2.71247337e-08,
+                      #            -1.50000222e+00]
+                      )
+
+
+class StructTestS(StructTestFunction):
+    def f(self, x):
+        return ((x[0] - 0.5) ** 2 + (x[1] - 0.5) ** 2
+                + (x[2] - 0.5) ** 2 + (x[3] - 0.5) ** 2)
+
+    g = None
+    cons = wrap_constraints(g)
+
+
+test_s = StructTestS(bounds=[(0, 2.0), ] * 4,
+                     expected_fun=0.0,
+                     expected_x=np.ones(4) - 0.5
+                     )
+
+
+class StructTestTable(StructTestFunction):
+    def f(self, x):
+        if x[0] == 3.0 and x[1] == 3.0:
+            return 50
+        else:
+            return 100
+
+    g = None
+    cons = wrap_constraints(g)
+
+
+test_table = StructTestTable(bounds=[(-10, 10), (-10, 10)],
+                             expected_fun=[50],
+                             expected_x=[3.0, 3.0])
+
+
+class StructTestInfeasible(StructTestFunction):
+    """
+    Test function with no feasible domain.
+    """
+
+    def f(self, x, *args):
+        return x[0] ** 2 + x[1] ** 2
+
+    def g1(x):
+        return x[0] + x[1] - 1
+
+    def g2(x):
+        return -(x[0] + x[1] - 1)
+
+    def g3(x):
+        return -x[0] + x[1] - 1
+
+    def g4(x):
+        return -(-x[0] + x[1] - 1)
+
+    g = (g1, g2, g3, g4)
+    cons = wrap_constraints(g)
+
+
+test_infeasible = StructTestInfeasible(bounds=[(2, 50), (-1, 1)],
+                                       expected_fun=None,
+                                       expected_x=None
+                                       )
+
+
+@pytest.mark.skip("Not a test")
+def run_test(test, args=(), test_atol=1e-5, n=100, iters=None,
+             callback=None, minimizer_kwargs=None, options=None,
+             sampling_method='sobol', workers=1):
+    res = shgo(test.f, test.bounds, args=args, constraints=test.cons,
+               n=n, iters=iters, callback=callback,
+               minimizer_kwargs=minimizer_kwargs, options=options,
+               sampling_method=sampling_method, workers=workers)
+
+    print(f'res = {res}')
+    logging.info(f'res = {res}')
+    if test.expected_x is not None:
+        np.testing.assert_allclose(res.x, test.expected_x,
+                                   rtol=test_atol,
+                                   atol=test_atol)
+
+    # (Optional tests)
+    if test.expected_fun is not None:
+        np.testing.assert_allclose(res.fun,
+                                   test.expected_fun,
+                                   atol=test_atol)
+
+    if test.expected_xl is not None:
+        np.testing.assert_allclose(res.xl,
+                                   test.expected_xl,
+                                   atol=test_atol)
+
+    if test.expected_funl is not None:
+        np.testing.assert_allclose(res.funl,
+                                   test.expected_funl,
+                                   atol=test_atol)
+    return
+
+
+# Base test functions:
+class TestShgoSobolTestFunctions:
+    """
+    Global optimisation tests with Sobol sampling:
+    """
+
+    # Sobol algorithm
+    def test_f1_1_sobol(self):
+        """Multivariate test function 1:
+        x[0]**2 + x[1]**2 with bounds=[(-1, 6), (-1, 6)]"""
+        run_test(test1_1)
+
+    def test_f1_2_sobol(self):
+        """Multivariate test function 1:
+         x[0]**2 + x[1]**2 with bounds=[(0, 1), (0, 1)]"""
+        run_test(test1_2)
+
+    def test_f1_3_sobol(self):
+        """Multivariate test function 1:
+        x[0]**2 + x[1]**2 with bounds=[(None, None),(None, None)]"""
+        options = {'disp': True}
+        run_test(test1_3, options=options)
+
+    def test_f2_1_sobol(self):
+        """Univariate test function on
+        f(x) = (x - 30) * sin(x) with bounds=[(0, 60)]"""
+        run_test(test2_1)
+
+    def test_f2_2_sobol(self):
+        """Univariate test function on
+        f(x) = (x - 30) * sin(x) bounds=[(0, 4.5)]"""
+        run_test(test2_2)
+
+    def test_f3_sobol(self):
+        """NLP: Hock and Schittkowski problem 18"""
+        run_test(test3_1)
+
+    @pytest.mark.slow
+    def test_f4_sobol(self):
+        """NLP: (High dimensional) Hock and Schittkowski 11 problem (HS11)"""
+        options = {'infty_constraints': False}
+        # run_test(test4_1, n=990, options=options)
+        run_test(test4_1, n=990 * 2, options=options)
+
+    def test_f5_1_sobol(self):
+        """NLP: Eggholder, multimodal"""
+        # run_test(test5_1, n=30)
+        run_test(test5_1, n=60)
+
+    def test_f5_2_sobol(self):
+        """NLP: Eggholder, multimodal"""
+        # run_test(test5_1, n=60, iters=5)
+        run_test(test5_1, n=60, iters=5)
+
+        # def test_t911(self):
+        #    """1D tabletop function"""
+        #    run_test(test11_1)
+
+
+class TestShgoSimplicialTestFunctions:
+    """
+    Global optimisation tests with Simplicial sampling:
+    """
+
+    def test_f1_1_simplicial(self):
+        """Multivariate test function 1:
+        x[0]**2 + x[1]**2 with bounds=[(-1, 6), (-1, 6)]"""
+        run_test(test1_1, n=1, sampling_method='simplicial')
+
+    def test_f1_2_simplicial(self):
+        """Multivariate test function 1:
+        x[0]**2 + x[1]**2 with bounds=[(0, 1), (0, 1)]"""
+        run_test(test1_2, n=1, sampling_method='simplicial')
+
+    def test_f1_3_simplicial(self):
+        """Multivariate test function 1: x[0]**2 + x[1]**2
+        with bounds=[(None, None),(None, None)]"""
+        run_test(test1_3, n=5, sampling_method='simplicial')
+
+    def test_f2_1_simplicial(self):
+        """Univariate test function on
+        f(x) = (x - 30) * sin(x) with bounds=[(0, 60)]"""
+        options = {'minimize_every_iter': False}
+        run_test(test2_1, n=200, iters=7, options=options,
+                 sampling_method='simplicial')
+
+    def test_f2_2_simplicial(self):
+        """Univariate test function on
+        f(x) = (x - 30) * sin(x) bounds=[(0, 4.5)]"""
+        run_test(test2_2, n=1, sampling_method='simplicial')
+
+    def test_f3_simplicial(self):
+        """NLP: Hock and Schittkowski problem 18"""
+        run_test(test3_1, n=1, sampling_method='simplicial')
+
+    @pytest.mark.slow
+    def test_f4_simplicial(self):
+        """NLP: (High dimensional) Hock and Schittkowski 11 problem (HS11)"""
+        run_test(test4_1, n=1, sampling_method='simplicial')
+
+    def test_lj_symmetry_old(self):
+        """LJ: Symmetry-constrained test function"""
+        options = {'symmetry': True,
+                   'disp': True}
+        args = (6,)  # Number of atoms
+        run_test(testLJ, args=args, n=300,
+                 options=options, iters=1,
+                 sampling_method='simplicial')
+
+    def test_f5_1_lj_symmetry(self):
+        """LJ: Symmetry constrained test function"""
+        options = {'symmetry': [0, ] * 6,
+                   'disp': True}
+        args = (6,)  # No. of atoms
+
+        run_test(testLJ, args=args, n=300,
+                 options=options, iters=1,
+                 sampling_method='simplicial')
+
+    def test_f5_2_cons_symmetry(self):
+        """Symmetry constrained test function"""
+        options = {'symmetry': [0, 0],
+                   'disp': True}
+
+        run_test(test1_1, n=200,
+                 options=options, iters=1,
+                 sampling_method='simplicial')
+
+    @pytest.mark.fail_slow(5)
+    def test_f5_3_cons_symmetry(self):
+        """Assymmetrically constrained test function"""
+        options = {'symmetry': [0, 0, 0, 3],
+                   'disp': True}
+
+        run_test(test_s, n=10000,
+                 options=options,
+                 iters=1,
+                 sampling_method='simplicial')
+
+    @pytest.mark.skip("Not a test")
+    def test_f0_min_variance(self):
+        """Return a minimum on a perfectly symmetric problem, based on
+            gh10429"""
+        avg = 0.5  # Given average value of x
+        cons = {'type': 'eq', 'fun': lambda x: np.mean(x) - avg}
+
+        # Minimize the variance of x under the given constraint
+        res = shgo(np.var, bounds=6 * [(0, 1)], constraints=cons)
+        assert res.success
+        assert_allclose(res.fun, 0, atol=1e-15)
+        assert_allclose(res.x, 0.5)
+
+    @pytest.mark.skip("Not a test")
+    def test_f0_min_variance_1D(self):
+        """Return a minimum on a perfectly symmetric 1D problem, based on
+            gh10538"""
+
+        def fun(x):
+            return x * (x - 1.0) * (x - 0.5)
+
+        bounds = [(0, 1)]
+        res = shgo(fun, bounds=bounds)
+        ref = minimize_scalar(fun, bounds=bounds[0])
+        assert res.success
+        assert_allclose(res.fun, ref.fun)
+        assert_allclose(res.x, ref.x, rtol=1e-6)
+
+# Argument test functions
+class TestShgoArguments:
+    def test_1_1_simpl_iter(self):
+        """Iterative simplicial sampling on TestFunction 1 (multivariate)"""
+        run_test(test1_2, n=None, iters=2, sampling_method='simplicial')
+
+    def test_1_2_simpl_iter(self):
+        """Iterative simplicial on TestFunction 2 (univariate)"""
+        options = {'minimize_every_iter': False}
+        run_test(test2_1, n=None, iters=9, options=options,
+                 sampling_method='simplicial')
+
+    def test_2_1_sobol_iter(self):
+        """Iterative Sobol sampling on TestFunction 1 (multivariate)"""
+        run_test(test1_2, n=None, iters=1, sampling_method='sobol')
+
+    def test_2_2_sobol_iter(self):
+        """Iterative Sobol sampling on TestFunction 2 (univariate)"""
+        res = shgo(test2_1.f, test2_1.bounds, constraints=test2_1.cons,
+                   n=None, iters=1, sampling_method='sobol')
+
+        np.testing.assert_allclose(res.x, test2_1.expected_x, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(res.fun, test2_1.expected_fun, atol=1e-5)
+
+    def test_3_1_disp_simplicial(self):
+        """Iterative sampling on TestFunction 1 and 2  (multi and univariate)
+        """
+
+        def callback_func(x):
+            print("Local minimization callback test")
+
+        for test in [test1_1, test2_1]:
+            shgo(test.f, test.bounds, iters=1,
+                 sampling_method='simplicial',
+                 callback=callback_func, options={'disp': True})
+            shgo(test.f, test.bounds, n=1, sampling_method='simplicial',
+                 callback=callback_func, options={'disp': True})
+
+    def test_3_2_disp_sobol(self):
+        """Iterative sampling on TestFunction 1 and 2 (multi and univariate)"""
+
+        def callback_func(x):
+            print("Local minimization callback test")
+
+        for test in [test1_1, test2_1]:
+            shgo(test.f, test.bounds, iters=1, sampling_method='sobol',
+                 callback=callback_func, options={'disp': True})
+
+            shgo(test.f, test.bounds, n=1, sampling_method='simplicial',
+                 callback=callback_func, options={'disp': True})
+
+    def test_args_gh14589(self):
+        """Using `args` used to cause `shgo` to fail; see #14589, #15986,
+        #16506"""
+        res = shgo(func=lambda x, y, z: x * z + y, bounds=[(0, 3)], args=(1, 2)
+                   )
+        ref = shgo(func=lambda x: 2 * x + 1, bounds=[(0, 3)])
+        assert_allclose(res.fun, ref.fun)
+        assert_allclose(res.x, ref.x)
+
+    @pytest.mark.slow
+    def test_4_1_known_f_min(self):
+        """Test known function minima stopping criteria"""
+        # Specify known function value
+        options = {'f_min': test4_1.expected_fun,
+                   'f_tol': 1e-6,
+                   'minimize_every_iter': True}
+        # TODO: Make default n higher for faster tests
+        run_test(test4_1, n=None, test_atol=1e-5, options=options,
+                 sampling_method='simplicial')
+
+    @pytest.mark.slow
+    def test_4_2_known_f_min(self):
+        """Test Global mode limiting local evaluations"""
+        options = {  # Specify known function value
+            'f_min': test4_1.expected_fun,
+            'f_tol': 1e-6,
+            # Specify number of local iterations to perform
+            'minimize_every_iter': True,
+            'local_iter': 1}
+
+        run_test(test4_1, n=None, test_atol=1e-5, options=options,
+                 sampling_method='simplicial')
+
+    def test_4_4_known_f_min(self):
+        """Test Global mode limiting local evaluations for 1D funcs"""
+        options = {  # Specify known function value
+            'f_min': test2_1.expected_fun,
+            'f_tol': 1e-6,
+            # Specify number of local iterations to perform+
+            'minimize_every_iter': True,
+            'local_iter': 1,
+            'infty_constraints': False}
+
+        res = shgo(test2_1.f, test2_1.bounds, constraints=test2_1.cons,
+                   n=None, iters=None, options=options,
+                   sampling_method='sobol')
+        np.testing.assert_allclose(res.x, test2_1.expected_x, rtol=1e-5, atol=1e-5)
+
+    def test_5_1_simplicial_argless(self):
+        """Test Default simplicial sampling settings on TestFunction 1"""
+        res = shgo(test1_1.f, test1_1.bounds, constraints=test1_1.cons)
+        np.testing.assert_allclose(res.x, test1_1.expected_x, rtol=1e-5, atol=1e-5)
+
+    def test_5_2_sobol_argless(self):
+        """Test Default sobol sampling settings on TestFunction 1"""
+        res = shgo(test1_1.f, test1_1.bounds, constraints=test1_1.cons,
+                   sampling_method='sobol')
+        np.testing.assert_allclose(res.x, test1_1.expected_x, rtol=1e-5, atol=1e-5)
+
+    def test_6_1_simplicial_max_iter(self):
+        """Test that maximum iteration option works on TestFunction 3"""
+        options = {'max_iter': 2}
+        res = shgo(test3_1.f, test3_1.bounds, constraints=test3_1.cons,
+                   options=options, sampling_method='simplicial')
+        np.testing.assert_allclose(res.x, test3_1.expected_x, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(res.fun, test3_1.expected_fun, atol=1e-5)
+
+    def test_6_2_simplicial_min_iter(self):
+        """Test that maximum iteration option works on TestFunction 3"""
+        options = {'min_iter': 2}
+        res = shgo(test3_1.f, test3_1.bounds, constraints=test3_1.cons,
+                   options=options, sampling_method='simplicial')
+        np.testing.assert_allclose(res.x, test3_1.expected_x, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(res.fun, test3_1.expected_fun, atol=1e-5)
+
+    def test_7_1_minkwargs(self):
+        """Test the minimizer_kwargs arguments for solvers with constraints"""
+        # Test solvers
+        for solver in ['COBYLA', 'COBYQA', 'SLSQP']:
+            # Note that passing global constraints to SLSQP is tested in other
+            # unittests which run test4_1 normally
+            minimizer_kwargs = {'method': solver,
+                                'constraints': test3_1.cons}
+            run_test(test3_1, n=100, test_atol=1e-3,
+                     minimizer_kwargs=minimizer_kwargs,
+                     sampling_method='sobol')
+
+    def test_7_2_minkwargs(self):
+        """Test the minimizer_kwargs default inits"""
+        minimizer_kwargs = {'ftol': 1e-5}
+        options = {'disp': True}  # For coverage purposes
+        SHGO(test3_1.f, test3_1.bounds, constraints=test3_1.cons[0],
+             minimizer_kwargs=minimizer_kwargs, options=options)
+
+    def test_7_3_minkwargs(self):
+        """Test minimizer_kwargs arguments for solvers without constraints"""
+        for solver in ['Nelder-Mead', 'Powell', 'CG', 'BFGS', 'Newton-CG',
+                       'L-BFGS-B', 'TNC', 'dogleg', 'trust-ncg', 'trust-exact',
+                       'trust-krylov']:
+            def jac(x):
+                return np.array([2 * x[0], 2 * x[1]]).T
+
+            def hess(x):
+                return np.array([[2, 0], [0, 2]])
+
+            minimizer_kwargs = {'method': solver,
+                                'jac': jac,
+                                'hess': hess}
+            logging.info(f"Solver = {solver}")
+            logging.info("=" * 100)
+            run_test(test1_1, n=100, test_atol=1e-3,
+                     minimizer_kwargs=minimizer_kwargs,
+                     sampling_method='sobol')
+
+    def test_8_homology_group_diff(self):
+        options = {'minhgrd': 1,
+                   'minimize_every_iter': True}
+
+        run_test(test1_1, n=None, iters=None, options=options,
+                 sampling_method='simplicial')
+
+    def test_9_cons_g(self):
+        """Test single function constraint passing"""
+        SHGO(test3_1.f, test3_1.bounds, constraints=test3_1.cons[0])
+
+    @pytest.mark.xfail(IS_PYPY and sys.platform == 'win32',
+            reason="Failing and fix in PyPy not planned (see gh-18632)")
+    def test_10_finite_time(self):
+        """Test single function constraint passing"""
+        options = {'maxtime': 1e-15}
+
+        def f(x):
+            time.sleep(1e-14)
+            return 0.0
+
+        res = shgo(f, test1_1.bounds, iters=5, options=options)
+        # Assert that only 1 rather than 5 requested iterations ran:
+        assert res.nit == 1
+
+    def test_11_f_min_0(self):
+        """Test to cover the case where f_lowest == 0"""
+        options = {'f_min': 0.0,
+                   'disp': True}
+        res = shgo(test1_2.f, test1_2.bounds, n=10, iters=None,
+                   options=options, sampling_method='sobol')
+        np.testing.assert_equal(0, res.x[0])
+        np.testing.assert_equal(0, res.x[1])
+
+    # @nottest
+    @pytest.mark.skip(reason="no way of currently testing this")
+    def test_12_sobol_inf_cons(self):
+        """Test to cover the case where f_lowest == 0"""
+        # TODO: This test doesn't cover anything new, it is unknown what the
+        # original test was intended for as it was never complete. Delete or
+        # replace in the future.
+        options = {'maxtime': 1e-15,
+                   'f_min': 0.0}
+        res = shgo(test1_2.f, test1_2.bounds, n=1, iters=None,
+                   options=options, sampling_method='sobol')
+        np.testing.assert_equal(0.0, res.fun)
+
+    def test_13_high_sobol(self):
+        """Test init of high-dimensional sobol sequences"""
+
+        def f(x):
+            return 0
+
+        bounds = [(None, None), ] * 41
+        SHGOc = SHGO(f, bounds, sampling_method='sobol')
+        # SHGOc.sobol_points(2, 50)
+        SHGOc.sampling_function(2, 50)
+
+    def test_14_local_iter(self):
+        """Test limited local iterations for a pseudo-global mode"""
+        options = {'local_iter': 4}
+        run_test(test5_1, n=60, options=options)
+
+    def test_15_min_every_iter(self):
+        """Test minimize every iter options and cover function cache"""
+        options = {'minimize_every_iter': True}
+        run_test(test1_1, n=1, iters=7, options=options,
+                 sampling_method='sobol')
+
+    def test_16_disp_bounds_minimizer(self, capsys):
+        """Test disp=True with minimizers that do not support bounds """
+        options = {'disp': True}
+        minimizer_kwargs = {'method': 'nelder-mead'}
+        run_test(test1_2, sampling_method='simplicial',
+                 options=options, minimizer_kwargs=minimizer_kwargs)
+
+    def test_17_custom_sampling(self):
+        """Test the functionality to add custom sampling methods to shgo"""
+
+        def sample(n, d):
+            return np.random.uniform(size=(n, d))
+
+        run_test(test1_1, n=30, sampling_method=sample)
+
+    def test_18_bounds_class(self):
+        # test that new and old bounds yield same result
+        def f(x):
+            return np.square(x).sum()
+
+        lb = [-6., 1., -5.]
+        ub = [-1., 3., 5.]
+        bounds_old = list(zip(lb, ub))
+        bounds_new = Bounds(lb, ub)
+
+        res_old_bounds = shgo(f, bounds_old)
+        res_new_bounds = shgo(f, bounds_new)
+
+        assert res_new_bounds.nfev == res_old_bounds.nfev
+        assert res_new_bounds.message == res_old_bounds.message
+        assert res_new_bounds.success == res_old_bounds.success
+        x_opt = np.array([-1., 1., 0.])
+        np.testing.assert_allclose(res_new_bounds.x, x_opt)
+        np.testing.assert_allclose(res_new_bounds.x, res_old_bounds.x)
+
+    @pytest.mark.fail_slow(5)
+    def test_19_parallelization(self):
+        """Test the functionality to add custom sampling methods to shgo"""
+
+        with Pool(2) as p:
+            run_test(test1_1, n=30, workers=p.map)  # Constrained
+        run_test(test1_1, n=30, workers=map)  # Constrained
+        with Pool(2) as p:
+            run_test(test_s, n=30, workers=p.map)  # Unconstrained
+        run_test(test_s, n=30, workers=map)  # Unconstrained
+
+    def test_20_constrained_args(self):
+        """Test that constraints can be passed to arguments"""
+
+        def eggholder(x):
+            return (
+                -(x[1] + 47.0)*np.sin(np.sqrt(abs(x[0] / 2.0 + (x[1] + 47.0))))
+                - x[0]*np.sin(np.sqrt(abs(x[0] - (x[1] + 47.0))))
+            )
+
+        def f(x):  # (cattle-feed)
+            return 24.55 * x[0] + 26.75 * x[1] + 39 * x[2] + 40.50 * x[3]
+
+        bounds = [(0, 1.0), ] * 4
+
+        def g1_modified(x, i):
+            return i * 2.3 * x[0] + i * 5.6 * x[1] + 11.1 * x[2] + 1.3 * x[
+                3] - 5  # >=0
+
+        def g2(x):
+            return (
+                12*x[0] + 11.9*x[1] + 41.8*x[2] + 52.1*x[3] - 21
+                - 1.645*np.sqrt(
+                    0.28*x[0]**2 + 0.19*x[1]**2 + 20.5*x[2]**2 + 0.62*x[3]**2
+                )
+            )  # >=0
+
+        def h1(x):
+            return x[0] + x[1] + x[2] + x[3] - 1  # == 0
+
+        cons = ({'type': 'ineq', 'fun': g1_modified, "args": (0,)},
+                {'type': 'ineq', 'fun': g2},
+                {'type': 'eq', 'fun': h1})
+
+        shgo(f, bounds, n=300, iters=1, constraints=cons)
+        # using constrain with arguments AND sampling method sobol
+        shgo(f, bounds, n=300, iters=1, constraints=cons,
+             sampling_method='sobol')
+
+    def test_21_1_jac_true(self):
+        """Test that shgo can handle objective functions that return the
+        gradient alongside the objective value. Fixes gh-13547"""
+        # previous
+        def func(x):
+            return np.sum(np.power(x, 2)), 2 * x
+
+        shgo(
+            func,
+            bounds=[[-1, 1], [1, 2]],
+            n=100, iters=5,
+            sampling_method="sobol",
+            minimizer_kwargs={'method': 'SLSQP', 'jac': True}
+        )
+
+        # new
+        def func(x):
+            return np.sum(x ** 2), 2 * x
+
+        bounds = [[-1, 1], [1, 2], [-1, 1], [1, 2], [0, 3]]
+
+        res = shgo(func, bounds=bounds, sampling_method="sobol",
+                   minimizer_kwargs={'method': 'SLSQP', 'jac': True})
+        ref = minimize(func, x0=[1, 1, 1, 1, 1], bounds=bounds,
+                       jac=True)
+        assert res.success
+        assert_allclose(res.fun, ref.fun)
+        assert_allclose(res.x, ref.x, atol=1e-15)
+
+    @pytest.mark.parametrize('derivative', ['jac', 'hess', 'hessp'])
+    def test_21_2_derivative_options(self, derivative):
+        """shgo used to raise an error when passing `options` with 'jac'
+        # see gh-12963. check that this is resolved
+        """
+
+        def objective(x):
+            return 3 * x[0] * x[0] + 2 * x[0] + 5
+
+        def gradient(x):
+            return 6 * x[0] + 2
+
+        def hess(x):
+            return 6
+
+        def hessp(x, p):
+            return 6 * p
+
+        derivative_funcs = {'jac': gradient, 'hess': hess, 'hessp': hessp}
+        options = {derivative: derivative_funcs[derivative]}
+        minimizer_kwargs = {'method': 'trust-constr'}
+
+        bounds = [(-100, 100)]
+        res = shgo(objective, bounds, minimizer_kwargs=minimizer_kwargs,
+                   options=options)
+        ref = minimize(objective, x0=[0], bounds=bounds, **minimizer_kwargs,
+                       **options)
+
+        assert res.success
+        np.testing.assert_allclose(res.fun, ref.fun)
+        np.testing.assert_allclose(res.x, ref.x)
+
+    def test_21_3_hess_options_rosen(self):
+        """Ensure the Hessian gets passed correctly to the local minimizer
+        routine. Previous report gh-14533.
+        """
+        bounds = [(0, 1.6), (0, 1.6), (0, 1.4), (0, 1.4), (0, 1.4)]
+        options = {'jac': rosen_der, 'hess': rosen_hess}
+        minimizer_kwargs = {'method': 'Newton-CG'}
+        res = shgo(rosen, bounds, minimizer_kwargs=minimizer_kwargs,
+                   options=options)
+        ref = minimize(rosen, np.zeros(5), method='Newton-CG',
+                       **options)
+        assert res.success
+        assert_allclose(res.fun, ref.fun)
+        assert_allclose(res.x, ref.x, atol=1e-15)
+
+    def test_21_arg_tuple_sobol(self):
+        """shgo used to raise an error when passing `args` with Sobol sampling
+        # see gh-12114. check that this is resolved"""
+
+        def fun(x, k):
+            return x[0] ** k
+
+        constraints = ({'type': 'ineq', 'fun': lambda x: x[0] - 1})
+
+        bounds = [(0, 10)]
+        res = shgo(fun, bounds, args=(1,), constraints=constraints,
+                   sampling_method='sobol')
+        ref = minimize(fun, np.zeros(1), bounds=bounds, args=(1,),
+                       constraints=constraints)
+        assert res.success
+        assert_allclose(res.fun, ref.fun)
+        assert_allclose(res.x, ref.x)
+
+
+# Failure test functions
+class TestShgoFailures:
+    def test_1_maxiter(self):
+        """Test failure on insufficient iterations"""
+        options = {'maxiter': 2}
+        res = shgo(test4_1.f, test4_1.bounds, n=2, iters=None,
+                   options=options, sampling_method='sobol')
+
+        np.testing.assert_equal(False, res.success)
+        # np.testing.assert_equal(4, res.nfev)
+        np.testing.assert_equal(4, res.tnev)
+
+    def test_2_sampling(self):
+        """Rejection of unknown sampling method"""
+        assert_raises(ValueError, shgo, test1_1.f, test1_1.bounds,
+                      sampling_method='not_Sobol')
+
+    def test_3_1_no_min_pool_sobol(self):
+        """Check that the routine stops when no minimiser is found
+           after maximum specified function evaluations"""
+        options = {'maxfev': 10,
+                   # 'maxev': 10,
+                   'disp': True}
+        res = shgo(test_table.f, test_table.bounds, n=3, options=options,
+                   sampling_method='sobol')
+        np.testing.assert_equal(False, res.success)
+        # np.testing.assert_equal(9, res.nfev)
+        np.testing.assert_equal(12, res.nfev)
+
+    def test_3_2_no_min_pool_simplicial(self):
+        """Check that the routine stops when no minimiser is found
+           after maximum specified sampling evaluations"""
+        options = {'maxev': 10,
+                   'disp': True}
+        res = shgo(test_table.f, test_table.bounds, n=3, options=options,
+                   sampling_method='simplicial')
+        np.testing.assert_equal(False, res.success)
+
+    def test_4_1_bound_err(self):
+        """Specified bounds ub > lb"""
+        bounds = [(6, 3), (3, 5)]
+        assert_raises(ValueError, shgo, test1_1.f, bounds)
+
+    def test_4_2_bound_err(self):
+        """Specified bounds are of the form (lb, ub)"""
+        bounds = [(3, 5, 5), (3, 5)]
+        assert_raises(ValueError, shgo, test1_1.f, bounds)
+
+    def test_5_1_1_infeasible_sobol(self):
+        """Ensures the algorithm terminates on infeasible problems
+           after maxev is exceeded. Use infty constraints option"""
+        options = {'maxev': 100,
+                   'disp': True}
+
+        res = shgo(test_infeasible.f, test_infeasible.bounds,
+                   constraints=test_infeasible.cons, n=100, options=options,
+                   sampling_method='sobol')
+
+        np.testing.assert_equal(False, res.success)
+
+    def test_5_1_2_infeasible_sobol(self):
+        """Ensures the algorithm terminates on infeasible problems
+           after maxev is exceeded. Do not use infty constraints option"""
+        options = {'maxev': 100,
+                   'disp': True,
+                   'infty_constraints': False}
+
+        res = shgo(test_infeasible.f, test_infeasible.bounds,
+                   constraints=test_infeasible.cons, n=100, options=options,
+                   sampling_method='sobol')
+
+        np.testing.assert_equal(False, res.success)
+
+    def test_5_2_infeasible_simplicial(self):
+        """Ensures the algorithm terminates on infeasible problems
+           after maxev is exceeded."""
+        options = {'maxev': 1000,
+                   'disp': False}
+
+        res = shgo(test_infeasible.f, test_infeasible.bounds,
+                   constraints=test_infeasible.cons, n=100, options=options,
+                   sampling_method='simplicial')
+
+        np.testing.assert_equal(False, res.success)
+
+    def test_6_1_lower_known_f_min(self):
+        """Test Global mode limiting local evaluations with f* too high"""
+        options = {  # Specify known function value
+            'f_min': test2_1.expected_fun + 2.0,
+            'f_tol': 1e-6,
+            # Specify number of local iterations to perform+
+            'minimize_every_iter': True,
+            'local_iter': 1,
+            'infty_constraints': False}
+        args = (test2_1.f, test2_1.bounds)
+        kwargs = {'constraints': test2_1.cons,
+                  'n': None,
+                  'iters': None,
+                  'options': options,
+                  'sampling_method': 'sobol'
+                  }
+        warns(UserWarning, shgo, *args, **kwargs)
+
+    def test(self):
+        from scipy.optimize import rosen, shgo
+        bounds = [(0, 2), (0, 2), (0, 2), (0, 2), (0, 2)]
+
+        def fun(x):
+            fun.nfev += 1
+            return rosen(x)
+
+        fun.nfev = 0
+
+        result = shgo(fun, bounds)
+        print(result.x, result.fun, fun.nfev)  # 50
+
+
+# Returns
+class TestShgoReturns:
+    def test_1_nfev_simplicial(self):
+        bounds = [(0, 2), (0, 2), (0, 2), (0, 2), (0, 2)]
+
+        def fun(x):
+            fun.nfev += 1
+            return rosen(x)
+
+        fun.nfev = 0
+
+        result = shgo(fun, bounds)
+        np.testing.assert_equal(fun.nfev, result.nfev)
+
+    def test_1_nfev_sobol(self):
+        bounds = [(0, 2), (0, 2), (0, 2), (0, 2), (0, 2)]
+
+        def fun(x):
+            fun.nfev += 1
+            return rosen(x)
+
+        fun.nfev = 0
+
+        result = shgo(fun, bounds, sampling_method='sobol')
+        np.testing.assert_equal(fun.nfev, result.nfev)
+
+
+def test_vector_constraint():
+    # gh15514
+    def quad(x):
+        x = np.asarray(x)
+        return [np.sum(x ** 2)]
+
+    nlc = NonlinearConstraint(quad, [2.2], [3])
+    oldc = new_constraint_to_old(nlc, np.array([1.0, 1.0]))
+
+    res = shgo(rosen, [(0, 10), (0, 10)], constraints=oldc, sampling_method='sobol')
+    assert np.all(np.sum((res.x)**2) >= 2.2)
+    assert np.all(np.sum((res.x) ** 2) <= 3.0)
+    assert res.success
+
+
+@pytest.mark.filterwarnings("ignore:delta_grad")
+def test_trust_constr():
+    def quad(x):
+        x = np.asarray(x)
+        return [np.sum(x ** 2)]
+
+    nlc = NonlinearConstraint(quad, [2.6], [3])
+    minimizer_kwargs = {'method': 'trust-constr'}
+    # note that we don't supply the constraints in minimizer_kwargs,
+    # so if the final result obeys the constraints we know that shgo
+    # passed them on to 'trust-constr'
+    res = shgo(
+        rosen,
+        [(0, 10), (0, 10)],
+        constraints=nlc,
+        sampling_method='sobol',
+        minimizer_kwargs=minimizer_kwargs
+    )
+    assert np.all(np.sum((res.x)**2) >= 2.6)
+    assert np.all(np.sum((res.x) ** 2) <= 3.0)
+    assert res.success
+
+
+def test_equality_constraints():
+    # gh16260
+    bounds = [(0.9, 4.0)] * 2  # Constrain probabilities to 0 and 1.
+
+    def faulty(x):
+        return x[0] + x[1]
+
+    nlc = NonlinearConstraint(faulty, 3.9, 3.9)
+    res = shgo(rosen, bounds=bounds, constraints=nlc)
+    assert_allclose(np.sum(res.x), 3.9)
+
+    def faulty(x):
+        return x[0] + x[1] - 3.9
+
+    constraints = {'type': 'eq', 'fun': faulty}
+    res = shgo(rosen, bounds=bounds, constraints=constraints)
+    assert_allclose(np.sum(res.x), 3.9)
+
+    bounds = [(0, 1.0)] * 4
+    # sum of variable should equal 1.
+    def faulty(x):
+        return x[0] + x[1] + x[2] + x[3] - 1
+
+    # options = {'minimize_every_iter': True, 'local_iter':10}
+    constraints = {'type': 'eq', 'fun': faulty}
+    res = shgo(
+        lambda x: - np.prod(x),
+        bounds=bounds,
+        constraints=constraints,
+        sampling_method='sobol'
+    )
+    assert_allclose(np.sum(res.x), 1.0)
+
+def test_gh16971():
+    def cons(x):
+        return np.sum(x**2) - 0
+
+    c = {'fun': cons, 'type': 'ineq'}
+    minimizer_kwargs = {
+        'method': 'COBYLA',
+        'options': {'rhobeg': 5, 'tol': 5e-1, 'catol': 0.05}
+    }
+
+    s = SHGO(
+        rosen, [(0, 10)]*2, constraints=c, minimizer_kwargs=minimizer_kwargs
+    )
+
+    assert s.minimizer_kwargs['method'].lower() == 'cobyla'
+    assert s.minimizer_kwargs['options']['catol'] == 0.05
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__spectral.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__spectral.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b4dc52cc20caf0206fe53933d4dfc6d0fbb2c34
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test__spectral.py
@@ -0,0 +1,226 @@
+import itertools
+
+import numpy as np
+from numpy import exp
+from numpy.testing import assert_, assert_equal
+
+from scipy.optimize import root
+
+
+def test_performance():
+    # Compare performance results to those listed in
+    # [Cheng & Li, IMA J. Num. An. 29, 814 (2008)]
+    # and
+    # [W. La Cruz, J.M. Martinez, M. Raydan, Math. Comp. 75, 1429 (2006)].
+    # and those produced by dfsane.f from M. Raydan's website.
+    #
+    # Where the results disagree, the largest limits are taken.
+
+    e_a = 1e-5
+    e_r = 1e-4
+
+    table_1 = [
+        dict(F=F_1, x0=x0_1, n=1000, nit=5, nfev=5),
+        dict(F=F_1, x0=x0_1, n=10000, nit=2, nfev=2),
+        dict(F=F_2, x0=x0_2, n=500, nit=11, nfev=11),
+        dict(F=F_2, x0=x0_2, n=2000, nit=11, nfev=11),
+        # dict(F=F_4, x0=x0_4, n=999, nit=243, nfev=1188) removed:
+        # too sensitive to rounding errors
+        # Results from dfsane.f; papers list nit=3, nfev=3
+        dict(F=F_6, x0=x0_6, n=100, nit=6, nfev=6),
+        # Must have n%3==0, typo in papers?
+        dict(F=F_7, x0=x0_7, n=99, nit=23, nfev=29),
+        # Must have n%3==0, typo in papers?
+        dict(F=F_7, x0=x0_7, n=999, nit=23, nfev=29),
+        # Results from dfsane.f; papers list nit=nfev=6?
+        dict(F=F_9, x0=x0_9, n=100, nit=12, nfev=18),
+        dict(F=F_9, x0=x0_9, n=1000, nit=12, nfev=18),
+        # Results from dfsane.f; papers list nit=2, nfev=12
+        dict(F=F_10, x0=x0_10, n=1000, nit=5, nfev=5),
+    ]
+
+    # Check also scaling invariance
+    for xscale, yscale, line_search in itertools.product(
+        [1.0, 1e-10, 1e10], [1.0, 1e-10, 1e10], ['cruz', 'cheng']
+    ):
+        for problem in table_1:
+            n = problem['n']
+            def func(x, n):
+                return yscale * problem['F'](x / xscale, n)
+            args = (n,)
+            x0 = problem['x0'](n) * xscale
+
+            fatol = np.sqrt(n) * e_a * yscale + e_r * np.linalg.norm(func(x0, n))
+
+            sigma_eps = 1e-10 * min(yscale/xscale, xscale/yscale)
+            sigma_0 = xscale/yscale
+
+            with np.errstate(over='ignore'):
+                sol = root(func, x0, args=args,
+                           options=dict(ftol=0, fatol=fatol, maxfev=problem['nfev'] + 1,
+                                        sigma_0=sigma_0, sigma_eps=sigma_eps,
+                                        line_search=line_search),
+                           method='DF-SANE')
+
+            err_msg = repr(
+                [xscale, yscale, line_search, problem, np.linalg.norm(func(sol.x, n)),
+                 fatol, sol.success, sol.nit, sol.nfev]
+            )
+            assert sol.success, err_msg
+            # nfev+1: dfsane.f doesn't count first eval
+            assert sol.nfev <= problem['nfev'] + 1, err_msg
+            assert sol.nit <= problem['nit'], err_msg
+            assert np.linalg.norm(func(sol.x, n)) <= fatol, err_msg
+
+
+def test_complex():
+    def func(z):
+        return z**2 - 1 + 2j
+    x0 = 2.0j
+
+    ftol = 1e-4
+    sol = root(func, x0, tol=ftol, method='DF-SANE')
+
+    assert_(sol.success)
+
+    f0 = np.linalg.norm(func(x0))
+    fx = np.linalg.norm(func(sol.x))
+    assert_(fx <= ftol*f0)
+
+
+def test_linear_definite():
+    # The DF-SANE paper proves convergence for "strongly isolated"
+    # solutions.
+    #
+    # For linear systems F(x) = A x - b = 0, with A positive or
+    # negative definite, the solution is strongly isolated.
+
+    def check_solvability(A, b, line_search='cruz'):
+        def func(x):
+            return A.dot(x) - b
+        xp = np.linalg.solve(A, b)
+        eps = np.linalg.norm(func(xp)) * 1e3
+        sol = root(
+            func, b,
+            options=dict(fatol=eps, ftol=0, maxfev=17523, line_search=line_search),
+            method='DF-SANE',
+        )
+        assert_(sol.success)
+        assert_(np.linalg.norm(func(sol.x)) <= eps)
+
+    n = 90
+
+    # Test linear pos.def. system
+    np.random.seed(1234)
+    A = np.arange(n*n).reshape(n, n)
+    A = A + n*n * np.diag(1 + np.arange(n))
+    assert_(np.linalg.eigvals(A).min() > 0)
+    b = np.arange(n) * 1.0
+    check_solvability(A, b, 'cruz')
+    check_solvability(A, b, 'cheng')
+
+    # Test linear neg.def. system
+    check_solvability(-A, b, 'cruz')
+    check_solvability(-A, b, 'cheng')
+
+
+def test_shape():
+    def f(x, arg):
+        return x - arg
+
+    for dt in [float, complex]:
+        x = np.zeros([2,2])
+        arg = np.ones([2,2], dtype=dt)
+
+        sol = root(f, x, args=(arg,), method='DF-SANE')
+        assert_(sol.success)
+        assert_equal(sol.x.shape, x.shape)
+
+
+# Some of the test functions and initial guesses listed in
+# [W. La Cruz, M. Raydan. Optimization Methods and Software, 18, 583 (2003)]
+
+def F_1(x, n):
+    g = np.zeros([n])
+    i = np.arange(2, n+1)
+    g[0] = exp(x[0] - 1) - 1
+    g[1:] = i*(exp(x[1:] - 1) - x[1:])
+    return g
+
+def x0_1(n):
+    x0 = np.empty([n])
+    x0.fill(n/(n-1))
+    return x0
+
+def F_2(x, n):
+    g = np.zeros([n])
+    i = np.arange(2, n+1)
+    g[0] = exp(x[0]) - 1
+    g[1:] = 0.1*i*(exp(x[1:]) + x[:-1] - 1)
+    return g
+
+def x0_2(n):
+    x0 = np.empty([n])
+    x0.fill(1/n**2)
+    return x0
+
+
+def F_4(x, n):  # skip name check
+    assert_equal(n % 3, 0)
+    g = np.zeros([n])
+    # Note: the first line is typoed in some of the references;
+    # correct in original [Gasparo, Optimization Meth. 13, 79 (2000)]
+    g[::3] = 0.6 * x[::3] + 1.6 * x[1::3]**3 - 7.2 * x[1::3]**2 + 9.6 * x[1::3] - 4.8
+    g[1::3] = (0.48 * x[::3] - 0.72 * x[1::3]**3 + 3.24 * x[1::3]**2 - 4.32 * x[1::3]
+               - x[2::3] + 0.2 * x[2::3]**3 + 2.16)
+    g[2::3] = 1.25 * x[2::3] - 0.25*x[2::3]**3
+    return g
+
+
+def x0_4(n):  # skip name check
+    assert_equal(n % 3, 0)
+    x0 = np.array([-1, 1/2, -1] * (n//3))
+    return x0
+
+def F_6(x, n):
+    c = 0.9
+    mu = (np.arange(1, n+1) - 0.5)/n
+    return x - 1/(1 - c/(2*n) * (mu[:,None]*x / (mu[:,None] + mu)).sum(axis=1))
+
+def x0_6(n):
+    return np.ones([n])
+
+def F_7(x, n):
+    assert_equal(n % 3, 0)
+
+    def phi(t):
+        v = 0.5*t - 2
+        v[t > -1] = ((-592*t**3 + 888*t**2 + 4551*t - 1924)/1998)[t > -1]
+        v[t >= 2] = (0.5*t + 2)[t >= 2]
+        return v
+    g = np.zeros([n])
+    g[::3] = 1e4 * x[1::3]**2 - 1
+    g[1::3] = exp(-x[::3]) + exp(-x[1::3]) - 1.0001
+    g[2::3] = phi(x[2::3])
+    return g
+
+def x0_7(n):
+    assert_equal(n % 3, 0)
+    return np.array([1e-3, 18, 1] * (n//3))
+
+def F_9(x, n):
+    g = np.zeros([n])
+    i = np.arange(2, n)
+    g[0] = x[0]**3/3 + x[1]**2/2
+    g[1:-1] = -x[1:-1]**2/2 + i*x[1:-1]**3/3 + x[2:]**2/2
+    g[-1] = -x[-1]**2/2 + n*x[-1]**3/3
+    return g
+
+def x0_9(n):
+    return np.ones([n])
+
+def F_10(x, n):
+    return np.log(1 + x) - x/n
+
+def x0_10(n):
+    return np.ones([n])
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_bracket.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_bracket.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc39b5fe52862a757d509e3c639016f90129e6d4
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_bracket.py
@@ -0,0 +1,793 @@
+import pytest
+
+import numpy as np
+from numpy.testing import assert_array_less, assert_allclose, assert_equal
+
+from scipy.optimize._bracket import _bracket_root, _bracket_minimum, _ELIMITS
+import scipy._lib._elementwise_iterative_method as eim
+from scipy import stats
+
+class TestBracketRoot:
+    @pytest.mark.parametrize("seed", (615655101, 3141866013, 238075752))
+    @pytest.mark.parametrize("use_xmin", (False, True))
+    @pytest.mark.parametrize("other_side", (False, True))
+    @pytest.mark.parametrize("fix_one_side", (False, True))
+    def test_nfev_expected(self, seed, use_xmin, other_side, fix_one_side):
+        # Property-based test to confirm that _bracket_root is behaving as
+        # expected. The basic case is when root < a < b.
+        # The number of times bracket expands (per side) can be found by
+        # setting the expression for the left endpoint of the bracket to the
+        # root of f (x=0), solving for i, and rounding up. The corresponding
+        # lower and upper ends of the bracket are found by plugging this back
+        # into the expression for the ends of the bracket.
+        # `other_side=True` is the case that a < b < root
+        # Special cases like a < root < b are tested separately
+
+        rng = np.random.default_rng(seed)
+        xl0, d, factor = rng.random(size=3) * [1e5, 10, 5]
+        factor = 1 + factor  # factor must be greater than 1
+        xr0 = xl0 + d  # xr0 must be greater than a in basic case
+
+        def f(x):
+            f.count += 1
+            return x  # root is 0
+
+        if use_xmin:
+            xmin = -rng.random()
+            n = np.ceil(np.log(-(xl0 - xmin) / xmin) / np.log(factor))
+            l, u = xmin + (xl0 - xmin)*factor**-n, xmin + (xl0 - xmin)*factor**-(n - 1)
+            kwargs = dict(xl0=xl0, xr0=xr0, factor=factor, xmin=xmin)
+        else:
+            n = np.ceil(np.log(xr0/d) / np.log(factor))
+            l, u = xr0 - d*factor**n, xr0 - d*factor**(n-1)
+            kwargs = dict(xl0=xl0, xr0=xr0, factor=factor)
+
+        if other_side:
+            kwargs['xl0'], kwargs['xr0'] = -kwargs['xr0'], -kwargs['xl0']
+            l, u = -u, -l
+            if 'xmin' in kwargs:
+                kwargs['xmax'] = -kwargs.pop('xmin')
+
+        if fix_one_side:
+            if other_side:
+                kwargs['xmin'] = -xr0
+            else:
+                kwargs['xmax'] = xr0
+
+        f.count = 0
+        res = _bracket_root(f, **kwargs)
+
+        # Compare reported number of function evaluations `nfev` against
+        # reported `nit`, actual function call count `f.count`, and theoretical
+        # number of expansions `n`.
+        # When both sides are free, these get multiplied by 2 because function
+        # is evaluated on the left and the right each iteration.
+        # When one side is fixed, however, we add one: on the right side, the
+        # function gets evaluated once at b.
+        # Add 1 to `n` and `res.nit` because function evaluations occur at
+        # iterations *0*, 1, ..., `n`. Subtract 1 from `f.count` because
+        # function is called separately for left and right in iteration 0.
+        if not fix_one_side:
+            assert res.nfev == 2*(res.nit+1) == 2*(f.count-1) == 2*(n + 1)
+        else:
+            assert res.nfev == (res.nit+1)+1 == (f.count-1)+1 == (n+1)+1
+
+        # Compare reported bracket to theoretical bracket and reported function
+        # values to function evaluated at bracket.
+        bracket = np.asarray([res.xl, res.xr])
+        assert_allclose(bracket, (l, u))
+        f_bracket = np.asarray([res.fl, res.fr])
+        assert_allclose(f_bracket, f(bracket))
+
+        # Check that bracket is valid and that status and success are correct
+        assert res.xr > res.xl
+        signs = np.sign(f_bracket)
+        assert signs[0] == -signs[1]
+        assert res.status == 0
+        assert res.success
+
+    def f(self, q, p):
+        return stats.norm.cdf(q) - p
+
+    @pytest.mark.parametrize('p', [0.6, np.linspace(0.05, 0.95, 10)])
+    @pytest.mark.parametrize('xmin', [-5, None])
+    @pytest.mark.parametrize('xmax', [5, None])
+    @pytest.mark.parametrize('factor', [1.2, 2])
+    def test_basic(self, p, xmin, xmax, factor):
+        # Test basic functionality to bracket root (distribution PPF)
+        res = _bracket_root(self.f, -0.01, 0.01, xmin=xmin, xmax=xmax,
+                            factor=factor, args=(p,))
+        assert_equal(-np.sign(res.fl), np.sign(res.fr))
+
+    @pytest.mark.parametrize('shape', [tuple(), (12,), (3, 4), (3, 2, 2)])
+    def test_vectorization(self, shape):
+        # Test for correct functionality, output shapes, and dtypes for various
+        # input shapes.
+        p = np.linspace(-0.05, 1.05, 12).reshape(shape) if shape else 0.6
+        args = (p,)
+        maxiter = 10
+
+        @np.vectorize
+        def bracket_root_single(xl0, xr0, xmin, xmax, factor, p):
+            return _bracket_root(self.f, xl0, xr0, xmin=xmin, xmax=xmax,
+                                 factor=factor, args=(p,),
+                                 maxiter=maxiter)
+
+        def f(*args, **kwargs):
+            f.f_evals += 1
+            return self.f(*args, **kwargs)
+        f.f_evals = 0
+
+        rng = np.random.default_rng(2348234)
+        xl0 = -rng.random(size=shape)
+        xr0 = rng.random(size=shape)
+        xmin, xmax = 1e3*xl0, 1e3*xr0
+        if shape:  # make some elements un
+            i = rng.random(size=shape) > 0.5
+            xmin[i], xmax[i] = -np.inf, np.inf
+        factor = rng.random(size=shape) + 1.5
+        res = _bracket_root(f, xl0, xr0, xmin=xmin, xmax=xmax, factor=factor,
+                            args=args, maxiter=maxiter)
+        refs = bracket_root_single(xl0, xr0, xmin, xmax, factor, p).ravel()
+
+        attrs = ['xl', 'xr', 'fl', 'fr', 'success', 'nfev', 'nit']
+        for attr in attrs:
+            ref_attr = [getattr(ref, attr) for ref in refs]
+            res_attr = getattr(res, attr)
+            assert_allclose(res_attr.ravel(), ref_attr)
+            assert_equal(res_attr.shape, shape)
+
+        assert np.issubdtype(res.success.dtype, np.bool_)
+        if shape:
+            assert np.all(res.success[1:-1])
+        assert np.issubdtype(res.status.dtype, np.integer)
+        assert np.issubdtype(res.nfev.dtype, np.integer)
+        assert np.issubdtype(res.nit.dtype, np.integer)
+        assert_equal(np.max(res.nit), f.f_evals - 2)
+        assert_array_less(res.xl, res.xr)
+        assert_allclose(res.fl, self.f(res.xl, *args))
+        assert_allclose(res.fr, self.f(res.xr, *args))
+
+    def test_flags(self):
+        # Test cases that should produce different status flags; show that all
+        # can be produced simultaneously.
+        def f(xs, js):
+            funcs = [lambda x: x - 1.5,
+                     lambda x: x - 1000,
+                     lambda x: x - 1000,
+                     lambda x: np.nan,
+                     lambda x: x]
+
+            return [funcs[j](x) for x, j in zip(xs, js)]
+
+        args = (np.arange(5, dtype=np.int64),)
+        res = _bracket_root(f,
+                            xl0=[-1, -1, -1, -1, 4],
+                            xr0=[1, 1, 1, 1, -4],
+                            xmin=[-np.inf, -1, -np.inf, -np.inf, 6],
+                            xmax=[np.inf, 1, np.inf, np.inf, 2],
+                            args=args, maxiter=3)
+
+        ref_flags = np.array([eim._ECONVERGED,
+                              _ELIMITS,
+                              eim._ECONVERR,
+                              eim._EVALUEERR,
+                              eim._EINPUTERR])
+
+        assert_equal(res.status, ref_flags)
+
+    @pytest.mark.parametrize("root", (0.622, [0.622, 0.623]))
+    @pytest.mark.parametrize('xmin', [-5, None])
+    @pytest.mark.parametrize('xmax', [5, None])
+    @pytest.mark.parametrize("dtype", (np.float16, np.float32, np.float64))
+    def test_dtype(self, root, xmin, xmax, dtype):
+        # Test that dtypes are preserved
+
+        xmin = xmin if xmin is None else dtype(xmin)
+        xmax = xmax if xmax is None else dtype(xmax)
+        root = dtype(root)
+        def f(x, root):
+            return ((x - root) ** 3).astype(dtype)
+
+        bracket = np.asarray([-0.01, 0.01], dtype=dtype)
+        res = _bracket_root(f, *bracket, xmin=xmin, xmax=xmax, args=(root,))
+        assert np.all(res.success)
+        assert res.xl.dtype == res.xr.dtype == dtype
+        assert res.fl.dtype == res.fr.dtype == dtype
+
+    def test_input_validation(self):
+        # Test input validation for appropriate error messages
+
+        message = '`func` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(None, -4, 4)
+
+        message = '...must be numeric and real.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4+1j, 4)
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 'hello')
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, xmin=np)
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, xmax=object())
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, factor=sum)
+
+        message = "All elements of `factor` must be greater than 1."
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, factor=0.5)
+
+        message = "shape mismatch: objects cannot be broadcast"
+        # raised by `np.broadcast, but the traceback is readable IMO
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, [-2, -3], [3, 4, 5])
+        # Consider making this give a more readable error message
+        # with pytest.raises(ValueError, match=message):
+        #     _bracket_root(lambda x: [x[0], x[1], x[1]], [-3, -3], [5, 5])
+
+        message = '`maxiter` must be a non-negative integer.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, maxiter=1.5)
+        with pytest.raises(ValueError, match=message):
+            _bracket_root(lambda x: x, -4, 4, maxiter=-1)
+
+    def test_special_cases(self):
+        # Test edge cases and other special cases
+
+        # Test that integers are not passed to `f`
+        # (otherwise this would overflow)
+        def f(x):
+            assert np.issubdtype(x.dtype, np.floating)
+            return x ** 99 - 1
+
+        res = _bracket_root(f, -7, 5)
+        assert res.success
+
+        # Test maxiter = 0. Should do nothing to bracket.
+        def f(x):
+            return x - 10
+
+        bracket = (-3, 5)
+        res = _bracket_root(f, *bracket, maxiter=0)
+        assert res.xl, res.xr == bracket
+        assert res.nit == 0
+        assert res.nfev == 2
+        assert res.status == -2
+
+        # Test scalar `args` (not in tuple)
+        def f(x, c):
+            return c*x - 1
+
+        res = _bracket_root(f, -1, 1, args=3)
+        assert res.success
+        assert_allclose(res.fl, f(res.xl, 3))
+
+        # Test other edge cases
+
+        def f(x):
+            f.count += 1
+            return x
+
+        # 1. root lies within guess of bracket
+        f.count = 0
+        _bracket_root(f, -10, 20)
+        assert_equal(f.count, 2)
+
+        # 2. bracket endpoint hits root exactly
+        f.count = 0
+        res = _bracket_root(f, 5, 10, factor=2)
+        bracket = (res.xl, res.xr)
+        assert_equal(res.nfev, 4)
+        assert_allclose(bracket, (0, 5), atol=1e-15)
+
+        # 3. bracket limit hits root exactly
+        with np.errstate(over='ignore'):
+            res = _bracket_root(f, 5, 10, xmin=0)
+        bracket = (res.xl, res.xr)
+        assert_allclose(bracket[0], 0, atol=1e-15)
+        with np.errstate(over='ignore'):
+            res = _bracket_root(f, -10, -5, xmax=0)
+        bracket = (res.xl, res.xr)
+        assert_allclose(bracket[1], 0, atol=1e-15)
+
+        # 4. bracket not within min, max
+        with np.errstate(over='ignore'):
+            res = _bracket_root(f, 5, 10, xmin=1)
+        assert not res.success
+
+
+class TestBracketMinimum:
+    def init_f(self):
+        def f(x, a, b):
+            f.count += 1
+            return (x - a)**2 + b
+        f.count = 0
+        return f
+
+    def assert_valid_bracket(self, result):
+        assert np.all(
+            (result.xl < result.xm) & (result.xm < result.xr)
+        )
+        assert np.all(
+            (result.fl >= result.fm) & (result.fr > result.fm)
+            | (result.fl > result.fm) & (result.fr > result.fm)
+        )
+
+    def get_kwargs(
+            self, *, xl0=None, xr0=None, factor=None, xmin=None, xmax=None, args=()
+    ):
+        names = ("xl0", "xr0", "xmin", "xmax", "factor", "args")
+        return {
+            name: val for name, val in zip(names, (xl0, xr0, xmin, xmax, factor, args))
+            if isinstance(val, np.ndarray) or np.isscalar(val)
+            or val not in [None, ()]
+        }
+
+    @pytest.mark.parametrize(
+        "seed",
+        (
+            307448016549685229886351382450158984917,
+            11650702770735516532954347931959000479,
+            113767103358505514764278732330028568336,
+        )
+    )
+    @pytest.mark.parametrize("use_xmin", (False, True))
+    @pytest.mark.parametrize("other_side", (False, True))
+    def test_nfev_expected(self, seed, use_xmin, other_side):
+        rng = np.random.default_rng(seed)
+        args = (0, 0)  # f(x) = x^2 with minimum at 0
+        # xl0, xm0, xr0 are chosen such that the initial bracket is to
+        # the right of the minimum, and the bracket will expand
+        # downhill towards zero.
+        xl0, d1, d2, factor = rng.random(size=4) * [1e5, 10, 10, 5]
+        xm0 = xl0 + d1
+        xr0 = xm0 + d2
+        # Factor should be greater than one.
+        factor += 1
+
+        if use_xmin:
+            xmin = -rng.random() * 5
+            n = int(np.ceil(np.log(-(xl0 - xmin) / xmin) / np.log(factor)))
+            lower = xmin + (xl0 - xmin)*factor**-n
+            middle = xmin + (xl0 - xmin)*factor**-(n-1)
+            upper = xmin + (xl0 - xmin)*factor**-(n-2) if n > 1 else xm0
+            # It may be the case the lower is below the minimum, but we still
+            # don't have a valid bracket.
+            if middle**2 > lower**2:
+                n += 1
+                lower, middle, upper = (
+                    xmin + (xl0 - xmin)*factor**-n, lower, middle
+                )
+        else:
+            xmin = None
+            n = int(np.ceil(np.log(xl0 / d1) / np.log(factor)))
+            lower = xl0 - d1*factor**n
+            middle = xl0 - d1*factor**(n-1) if n > 1 else xl0
+            upper = xl0 - d1*factor**(n-2) if n > 1 else xm0
+            # It may be the case the lower is below the minimum, but we still
+            # don't have a valid bracket.
+            if middle**2 > lower**2:
+                n += 1
+                lower, middle, upper = (
+                    xl0 - d1*factor**n, lower, middle
+                )
+        f = self.init_f()
+
+        xmax = None
+        if other_side:
+            xl0, xm0, xr0 = -xr0, -xm0, -xl0
+            xmin, xmax = None, -xmin if xmin is not None else None
+            lower, middle, upper = -upper, -middle, -lower
+
+        kwargs = self.get_kwargs(
+            xl0=xl0, xr0=xr0, xmin=xmin, xmax=xmax, factor=factor, args=args
+        )
+        result = _bracket_minimum(f, xm0, **kwargs)
+
+        # Check that `nfev` and `nit` have the correct relationship
+        assert result.nfev == result.nit + 3
+        # Check that `nfev` reports the correct number of function evaluations.
+        assert result.nfev == f.count
+        # Check that the number of iterations matches the theoretical value.
+        assert result.nit == n
+
+        # Compare reported bracket to theoretical bracket and reported function
+        # values to function evaluated at bracket.
+        bracket = np.asarray([result.xl, result.xm, result.xr])
+        assert_allclose(bracket, (lower, middle, upper))
+        f_bracket = np.asarray([result.fl, result.fm, result.fr])
+        assert_allclose(f_bracket, f(bracket, *args))
+
+        self.assert_valid_bracket(result)
+        assert result.status == 0
+        assert result.success
+
+    def test_flags(self):
+        # Test cases that should produce different status flags; show that all
+        # can be produced simultaneously
+        def f(xs, js):
+            funcs = [lambda x: (x - 1.5)**2,
+                     lambda x: x,
+                     lambda x: x,
+                     lambda x: np.nan,
+                     lambda x: x**2]
+
+            return [funcs[j](x) for x, j in zip(xs, js)]
+
+        args = (np.arange(5, dtype=np.int64),)
+        xl0 = [-1.0, -1.0, -1.0, -1.0, 6.0]
+        xm0 = [0.0, 0.0, 0.0, 0.0, 4.0]
+        xr0 = [1.0, 1.0, 1.0, 1.0, 2.0]
+        xmin=[-np.inf, -1.0, -np.inf, -np.inf, 8.0]
+
+        result = _bracket_minimum(f, xm0, xl0=xl0, xr0=xr0, xmin=xmin,
+                                  args=args, maxiter=3)
+
+        reference_flags = np.array([eim._ECONVERGED, _ELIMITS,
+                                    eim._ECONVERR, eim._EVALUEERR,
+                                    eim._EINPUTERR])
+        assert_equal(result.status, reference_flags)
+
+    @pytest.mark.parametrize("minimum", (0.622, [0.622, 0.623]))
+    @pytest.mark.parametrize("dtype", (np.float16, np.float32, np.float64))
+    @pytest.mark.parametrize("xmin", [-5, None])
+    @pytest.mark.parametrize("xmax", [5, None])
+    def test_dtypes(self, minimum, xmin, xmax, dtype):
+        xmin = xmin if xmin is None else dtype(xmin)
+        xmax = xmax if xmax is None else dtype(xmax)
+        minimum = dtype(minimum)
+
+        def f(x, minimum):
+            return ((x - minimum)**2).astype(dtype)
+
+        xl0, xm0, xr0 = np.array([-0.01, 0.0, 0.01], dtype=dtype)
+        result = _bracket_minimum(
+            f, xm0, xl0=xl0, xr0=xr0, xmin=xmin, xmax=xmax, args=(minimum, )
+        )
+        assert np.all(result.success)
+        assert result.xl.dtype == result.xm.dtype == result.xr.dtype == dtype
+        assert result.fl.dtype == result.fm.dtype == result.fr.dtype == dtype
+
+    def test_input_validation(self):
+        # Test input validation for appropriate error messages
+
+        message = '`func` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(None, -4, xl0=4)
+
+        message = '...must be numeric and real.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, 4+1j)
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, xl0='hello')
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, xmin=np)
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, xmax=object())
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, factor=sum)
+
+        message = "All elements of `factor` must be greater than 1."
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x, -4, factor=0.5)
+
+        message = "shape mismatch: objects cannot be broadcast"
+        # raised by `np.broadcast, but the traceback is readable IMO
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, [-2, -3], xl0=[-3, -4, -5])
+
+        message = '`maxiter` must be a non-negative integer.'
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, xr0=4, maxiter=1.5)
+        with pytest.raises(ValueError, match=message):
+            _bracket_minimum(lambda x: x**2, -4, xr0=4, maxiter=-1)
+
+    @pytest.mark.parametrize("xl0", [0.0, None])
+    @pytest.mark.parametrize("xm0", (0.05, 0.1, 0.15))
+    @pytest.mark.parametrize("xr0", (0.2, 0.4, 0.6, None))
+    # Minimum is ``a`` for each tuple ``(a, b)`` below. Tests cases where minimum
+    # is within, or at varying disances to the left or right of the initial
+    # bracket.
+    @pytest.mark.parametrize(
+        "args",
+        (
+            (1.2, 0), (-0.5, 0), (0.1, 0), (0.2, 0), (3.6, 0), (21.4, 0),
+            (121.6, 0), (5764.1, 0), (-6.4, 0), (-12.9, 0), (-146.2, 0)
+        )
+    )
+    def test_scalar_no_limits(self, xl0, xm0, xr0, args):
+        f = self.init_f()
+        kwargs = self.get_kwargs(xl0=xl0, xr0=xr0, args=args)
+        result = _bracket_minimum(f, xm0, **kwargs)
+        self.assert_valid_bracket(result)
+        assert result.status == 0
+        assert result.success
+        assert result.nfev == f.count
+
+    @pytest.mark.parametrize(
+        # xmin is set at 0.0 in all cases.
+        "xl0,xm0,xr0,xmin",
+        (
+            # Initial bracket at varying distances from the xmin.
+            (0.5, 0.75, 1.0, 0.0),
+            (1.0, 2.5, 4.0, 0.0),
+            (2.0, 4.0, 6.0, 0.0),
+            (12.0, 16.0, 20.0, 0.0),
+            # Test default initial left endpoint selection. It should not
+            # be below xmin.
+            (None, 0.75, 1.0, 0.0),
+            (None, 2.5, 4.0, 0.0),
+            (None, 4.0, 6.0, 0.0),
+            (None, 16.0, 20.0, 0.0),
+        )
+    )
+    @pytest.mark.parametrize(
+        "args", (
+            (0.0, 0.0), # Minimum is directly at xmin.
+            (1e-300, 0.0), # Minimum is extremely close to xmin.
+            (1e-20, 0.0), # Minimum is very close to xmin.
+            # Minimum at varying distances from xmin.
+            (0.1, 0.0),
+            (0.2, 0.0),
+            (0.4, 0.0)
+        )
+    )
+    def test_scalar_with_limit_left(self, xl0, xm0, xr0, xmin, args):
+        f = self.init_f()
+        kwargs = self.get_kwargs(xl0=xl0, xr0=xr0, xmin=xmin, args=args)
+        result = _bracket_minimum(f, xm0, **kwargs)
+        self.assert_valid_bracket(result)
+        assert result.status == 0
+        assert result.success
+        assert result.nfev == f.count
+
+    @pytest.mark.parametrize(
+        #xmax is set to 1.0 in all cases.
+        "xl0,xm0,xr0,xmax",
+        (
+            # Bracket at varying distances from xmax.
+            (0.2, 0.3, 0.4, 1.0),
+            (0.05, 0.075, 0.1, 1.0),
+            (-0.2, -0.1, 0.0, 1.0),
+            (-21.2, -17.7, -14.2, 1.0),
+            # Test default right endpoint selection. It should not exceed xmax.
+            (0.2, 0.3, None, 1.0),
+            (0.05, 0.075, None, 1.0),
+            (-0.2, -0.1, None, 1.0),
+            (-21.2, -17.7, None, 1.0),
+        )
+    )
+    @pytest.mark.parametrize(
+        "args", (
+            (0.9999999999999999, 0.0), # Minimum very close to xmax.
+            # Minimum at varying distances from xmax.
+            (0.9, 0.0),
+            (0.7, 0.0),
+            (0.5, 0.0)
+        )
+    )
+    def test_scalar_with_limit_right(self, xl0, xm0, xr0, xmax, args):
+        f = self.init_f()
+        kwargs = self.get_kwargs(xl0=xl0, xr0=xr0, xmax=xmax, args=args)
+        result = _bracket_minimum(f, xm0, **kwargs)
+        self.assert_valid_bracket(result)
+        assert result.status == 0
+        assert result.success
+        assert result.nfev == f.count
+
+    @pytest.mark.parametrize(
+        "xl0,xm0,xr0,xmin,xmax,args",
+        (
+            (   # Case 1:
+                # Initial bracket.
+                0.2, 
+                0.3,
+                0.4,
+                # Function slopes down to the right from the bracket to a minimum
+                # at 1.0. xmax is also at 1.0
+                None, 
+                1.0,
+                (1.0, 0.0)
+            ),
+            (   # Case 2:
+                # Initial bracket.
+                1.4,
+                1.95,
+                2.5,
+                # Function slopes down to the left from the bracket to a minimum at
+                # 0.3 with xmin set to 0.3.
+                0.3,
+                None,
+                (0.3, 0.0)
+            ),
+            (
+                # Case 3:
+                # Initial bracket.
+                2.6,
+                3.25,
+                3.9,
+                # Function slopes down and to the right to a minimum at 99.4 with xmax
+                # at 99.4. Tests case where minimum is at xmax relatively further from
+                # the bracket.
+                None,
+                99.4,
+                (99.4, 0)
+            ),
+            (
+                # Case 4:
+                # Initial bracket.
+                4,
+                4.5,
+                5,
+                # Function slopes down and to the left away from the bracket with a
+                # minimum at -26.3 with xmin set to -26.3. Tests case where minimum is
+                # at xmin relatively far from the bracket.
+                -26.3,
+                None,
+                (-26.3, 0)
+            ),
+            (
+                # Case 5:
+                # Similar to Case 1 above, but tests default values of xl0 and xr0.
+                None,
+                0.3,
+                None,
+                None,
+                1.0,
+                (1.0, 0.0)
+            ),
+            (   # Case 6:
+                # Similar to Case 2 above, but tests default values of xl0 and xr0.
+                None,
+                1.95,
+                None,
+                0.3,
+                None,
+                (0.3, 0.0)
+            ),
+            (
+                # Case 7:
+                # Similar to Case 3 above, but tests default values of xl0 and xr0.
+                None,
+                3.25,
+                None,
+                None,
+                99.4,
+                (99.4, 0)
+            ),
+            (
+                # Case 8:
+                # Similar to Case 4 above, but tests default values of xl0 and xr0.
+                None,
+                4.5,
+                None,
+                -26.3,
+                None,
+                (-26.3, 0)
+            ),
+        )
+    )
+    def test_minimum_at_boundary_point(self, xl0, xm0, xr0, xmin, xmax, args):
+        f = self.init_f()
+        kwargs = self.get_kwargs(xr0=xr0, xmin=xmin, xmax=xmax, args=args)
+        result = _bracket_minimum(f, xm0, **kwargs)
+        assert result.status == -1
+        assert args[0] in (result.xl, result.xr)
+        assert result.nfev == f.count
+
+    @pytest.mark.parametrize('shape', [tuple(), (12, ), (3, 4), (3, 2, 2)])
+    def test_vectorization(self, shape):
+        # Test for correct functionality, output shapes, and dtypes for
+        # various input shapes.
+        a = np.linspace(-0.05, 1.05, 12).reshape(shape) if shape else 0.6
+        args = (a, 0.0)
+        maxiter = 10
+
+        @np.vectorize
+        def bracket_minimum_single(xm0, xl0, xr0, xmin, xmax, factor, a):
+            return _bracket_minimum(self.init_f(), xm0, xl0=xl0, xr0=xr0, xmin=xmin,
+                                    xmax=xmax, factor=factor, maxiter=maxiter,
+                                    args=(a, 0.0))
+
+        f = self.init_f()
+
+        rng = np.random.default_rng(2348234)
+        xl0 = -rng.random(size=shape)
+        xr0 = rng.random(size=shape)
+        xm0 = xl0 + rng.random(size=shape) * (xr0 - xl0)
+        xmin, xmax = 1e3*xl0, 1e3*xr0
+        if shape:  # make some elements un
+            i = rng.random(size=shape) > 0.5
+            xmin[i], xmax[i] = -np.inf, np.inf
+        factor = rng.random(size=shape) + 1.5
+        res = _bracket_minimum(f, xm0, xl0=xl0, xr0=xr0, xmin=xmin, xmax=xmax,
+                               factor=factor, args=args, maxiter=maxiter)
+        refs = bracket_minimum_single(xm0, xl0, xr0, xmin, xmax, factor, a).ravel()
+
+        attrs = ['xl', 'xm', 'xr', 'fl', 'fm', 'fr', 'success', 'nfev', 'nit']
+        for attr in attrs:
+            ref_attr = [getattr(ref, attr) for ref in refs]
+            res_attr = getattr(res, attr)
+            assert_allclose(res_attr.ravel(), ref_attr)
+            assert_equal(res_attr.shape, shape)
+
+        assert np.issubdtype(res.success.dtype, np.bool_)
+        if shape:
+            assert np.all(res.success[1:-1])
+        assert np.issubdtype(res.status.dtype, np.integer)
+        assert np.issubdtype(res.nfev.dtype, np.integer)
+        assert np.issubdtype(res.nit.dtype, np.integer)
+        assert_equal(np.max(res.nit), f.count - 3)
+        self.assert_valid_bracket(res)
+        assert_allclose(res.fl, f(res.xl, *args))
+        assert_allclose(res.fm, f(res.xm, *args))
+        assert_allclose(res.fr, f(res.xr, *args))
+
+    def test_special_cases(self):
+        # Test edge cases and other special cases.
+
+        # Test that integers are not passed to `f`
+        # (otherwise this would overflow)
+        def f(x):
+            assert np.issubdtype(x.dtype, np.floating)
+            return x ** 98 - 1
+
+        result = _bracket_minimum(f, -7, xr0=5)
+        assert result.success
+
+        # Test maxiter = 0. Should do nothing to bracket.
+        def f(x):
+            return x**2 - 10
+
+        xl0, xm0, xr0 = -3, -1, 2
+        result = _bracket_minimum(f, xm0, xl0=xl0, xr0=xr0, maxiter=0)
+        assert_equal([result.xl, result.xm, result.xr], [xl0, xm0, xr0])
+
+        # Test scalar `args` (not in tuple)
+        def f(x, c):
+            return c*x**2 - 1
+
+        result = _bracket_minimum(f, -1, args=3)
+        assert result.success
+        assert_allclose(result.fl, f(result.xl, 3))
+
+        # Initial bracket is valid.
+        f = self.init_f()
+        xl0, xm0, xr0 = [-1.0, -0.2, 1.0]
+        args = (0, 0)
+        result = _bracket_minimum(f, xm0, xl0=xl0, xr0=xr0, args=args)
+        assert f.count == 3
+
+        assert_equal(
+            [result.xl, result.xm, result.xr],
+            [xl0, xm0, xr0],
+        )
+        assert_equal(
+            [result.fl, result.fm, result.fr],
+            [f(xl0, *args), f(xm0, *args), f(xr0, *args)],
+        )
+
+    def test_gh_20562_left(self):
+        # Regression test for https://github.com/scipy/scipy/issues/20562
+        # minimum of f in [xmin, xmax] is at xmin.
+        xmin, xmax = 0.21933608, 1.39713606
+
+        def f(x):
+            log_a, log_b = np.log([xmin, xmax])
+            return -((log_b - log_a)*x)**-1
+
+        result = _bracket_minimum(f, 0.5535723499480897, xmin=xmin, xmax=xmax)
+        assert xmin == result.xl
+
+    def test_gh_20562_right(self):
+        # Regression test for https://github.com/scipy/scipy/issues/20562
+        # minimum of f in [xmin, xmax] is at xmax.
+        xmin, xmax = -1.39713606, -0.21933608,
+
+        def f(x):
+            log_a, log_b = np.log([-xmax, -xmin])
+            return ((log_b - log_a)*x)**-1
+
+        result = _bracket_minimum(f, -0.5535723499480897, xmin=xmin, xmax=xmax)
+        assert xmax == result.xr
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_chandrupatla.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_chandrupatla.py
new file mode 100644
index 0000000000000000000000000000000000000000..1300c08784b5056d5bde9798bd57d2e1dd75f635
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_chandrupatla.py
@@ -0,0 +1,906 @@
+import pytest
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal, assert_array_less
+
+from scipy import stats, special
+import scipy._lib._elementwise_iterative_method as eim
+from scipy.conftest import array_api_compatible
+from scipy._lib._array_api import (array_namespace, xp_assert_close, xp_assert_equal,
+                                   xp_assert_less, xp_minimum, is_numpy, is_cupy)
+
+from scipy.optimize._chandrupatla import (_chandrupatla_minimize,
+                                          _chandrupatla as _chandrupatla_root)
+from scipy.optimize._tstutils import _CHANDRUPATLA_TESTS
+
+from itertools import permutations
+from .test_zeros import TestScalarRootFinders
+
+def f1(x):
+    return 100*(1 - x**3.)**2 + (1-x**2.) + 2*(1-x)**2.
+
+
+def f2(x):
+    return 5 + (x - 2.)**6
+
+
+def f3(x):
+    return np.exp(x) - 5*x
+
+
+def f4(x):
+    return x**5. - 5*x**3. - 20.*x + 5.
+
+
+def f5(x):
+    return 8*x**3 - 2*x**2 - 7*x + 3
+
+
+def _bracket_minimum(func, x1, x2):
+    phi = 1.61803398875
+    maxiter = 100
+    f1 = func(x1)
+    f2 = func(x2)
+    step = x2 - x1
+    x1, x2, f1, f2, step = ((x2, x1, f2, f1, -step) if f2 > f1
+                            else (x1, x2, f1, f2, step))
+
+    for i in range(maxiter):
+        step *= phi
+        x3 = x2 + step
+        f3 = func(x3)
+        if f3 < f2:
+            x1, x2, f1, f2 = x2, x3, f2, f3
+        else:
+            break
+    return x1, x2, x3, f1, f2, f3
+
+
+cases = [
+    (f1, -1, 11),
+    (f1, -2, 13),
+    (f1, -4, 13),
+    (f1, -8, 15),
+    (f1, -16, 16),
+    (f1, -32, 19),
+    (f1, -64, 20),
+    (f1, -128, 21),
+    (f1, -256, 21),
+    (f1, -512, 19),
+    (f1, -1024, 24),
+    (f2, -1, 8),
+    (f2, -2, 6),
+    (f2, -4, 6),
+    (f2, -8, 7),
+    (f2, -16, 8),
+    (f2, -32, 8),
+    (f2, -64, 9),
+    (f2, -128, 11),
+    (f2, -256, 13),
+    (f2, -512, 12),
+    (f2, -1024, 13),
+    (f3, -1, 11),
+    (f3, -2, 11),
+    (f3, -4, 11),
+    (f3, -8, 10),
+    (f3, -16, 14),
+    (f3, -32, 12),
+    (f3, -64, 15),
+    (f3, -128, 18),
+    (f3, -256, 18),
+    (f3, -512, 19),
+    (f3, -1024, 19),
+    (f4, -0.05, 9),
+    (f4, -0.10, 11),
+    (f4, -0.15, 11),
+    (f4, -0.20, 11),
+    (f4, -0.25, 11),
+    (f4, -0.30, 9),
+    (f4, -0.35, 9),
+    (f4, -0.40, 9),
+    (f4, -0.45, 10),
+    (f4, -0.50, 10),
+    (f4, -0.55, 10),
+    (f5, -0.05, 6),
+    (f5, -0.10, 7),
+    (f5, -0.15, 8),
+    (f5, -0.20, 10),
+    (f5, -0.25, 9),
+    (f5, -0.30, 8),
+    (f5, -0.35, 7),
+    (f5, -0.40, 7),
+    (f5, -0.45, 9),
+    (f5, -0.50, 9),
+    (f5, -0.55, 8)
+]
+
+
+class TestChandrupatlaMinimize:
+
+    def f(self, x, loc):
+        dist = stats.norm()
+        return -dist.pdf(x - loc)
+
+    @pytest.mark.parametrize('loc', [0.6, np.linspace(-1.05, 1.05, 10)])
+    def test_basic(self, loc):
+        # Find mode of normal distribution. Compare mode against location
+        # parameter and value of pdf at mode against expected pdf.
+        res = _chandrupatla_minimize(self.f, -5, 0, 5, args=(loc,))
+        ref = loc
+        np.testing.assert_allclose(res.x, ref, rtol=1e-6)
+        np.testing.assert_allclose(res.fun, -stats.norm.pdf(0), atol=0, rtol=0)
+        assert res.x.shape == np.shape(ref)
+
+    @pytest.mark.parametrize('shape', [tuple(), (12,), (3, 4), (3, 2, 2)])
+    def test_vectorization(self, shape):
+        # Test for correct functionality, output shapes, and dtypes for various
+        # input shapes.
+        loc = np.linspace(-0.05, 1.05, 12).reshape(shape) if shape else 0.6
+        args = (loc,)
+
+        @np.vectorize
+        def chandrupatla_single(loc_single):
+            return _chandrupatla_minimize(self.f, -5, 0, 5, args=(loc_single,))
+
+        def f(*args, **kwargs):
+            f.f_evals += 1
+            return self.f(*args, **kwargs)
+        f.f_evals = 0
+
+        res = _chandrupatla_minimize(f, -5, 0, 5, args=args)
+        refs = chandrupatla_single(loc).ravel()
+
+        ref_x = [ref.x for ref in refs]
+        assert_allclose(res.x.ravel(), ref_x)
+        assert_equal(res.x.shape, shape)
+
+        ref_fun = [ref.fun for ref in refs]
+        assert_allclose(res.fun.ravel(), ref_fun)
+        assert_equal(res.fun.shape, shape)
+        assert_equal(res.fun, self.f(res.x, *args))
+
+        ref_success = [ref.success for ref in refs]
+        assert_equal(res.success.ravel(), ref_success)
+        assert_equal(res.success.shape, shape)
+        assert np.issubdtype(res.success.dtype, np.bool_)
+
+        ref_flag = [ref.status for ref in refs]
+        assert_equal(res.status.ravel(), ref_flag)
+        assert_equal(res.status.shape, shape)
+        assert np.issubdtype(res.status.dtype, np.integer)
+
+        ref_nfev = [ref.nfev for ref in refs]
+        assert_equal(res.nfev.ravel(), ref_nfev)
+        assert_equal(np.max(res.nfev), f.f_evals)
+        assert_equal(res.nfev.shape, res.fun.shape)
+        assert np.issubdtype(res.nfev.dtype, np.integer)
+
+        ref_nit = [ref.nit for ref in refs]
+        assert_equal(res.nit.ravel(), ref_nit)
+        assert_equal(np.max(res.nit), f.f_evals-3)
+        assert_equal(res.nit.shape, res.fun.shape)
+        assert np.issubdtype(res.nit.dtype, np.integer)
+
+        ref_xl = [ref.xl for ref in refs]
+        assert_allclose(res.xl.ravel(), ref_xl)
+        assert_equal(res.xl.shape, shape)
+
+        ref_xm = [ref.xm for ref in refs]
+        assert_allclose(res.xm.ravel(), ref_xm)
+        assert_equal(res.xm.shape, shape)
+
+        ref_xr = [ref.xr for ref in refs]
+        assert_allclose(res.xr.ravel(), ref_xr)
+        assert_equal(res.xr.shape, shape)
+
+        ref_fl = [ref.fl for ref in refs]
+        assert_allclose(res.fl.ravel(), ref_fl)
+        assert_equal(res.fl.shape, shape)
+        assert_allclose(res.fl, self.f(res.xl, *args))
+
+        ref_fm = [ref.fm for ref in refs]
+        assert_allclose(res.fm.ravel(), ref_fm)
+        assert_equal(res.fm.shape, shape)
+        assert_allclose(res.fm, self.f(res.xm, *args))
+
+        ref_fr = [ref.fr for ref in refs]
+        assert_allclose(res.fr.ravel(), ref_fr)
+        assert_equal(res.fr.shape, shape)
+        assert_allclose(res.fr, self.f(res.xr, *args))
+
+    def test_flags(self):
+        # Test cases that should produce different status flags; show that all
+        # can be produced simultaneously.
+        def f(xs, js):
+            funcs = [lambda x: (x - 2.5) ** 2,
+                     lambda x: x - 10,
+                     lambda x: (x - 2.5) ** 4,
+                     lambda x: np.nan]
+
+            return [funcs[j](x) for x, j in zip(xs, js)]
+
+        args = (np.arange(4, dtype=np.int64),)
+
+        res = _chandrupatla_minimize(f, [0]*4, [2]*4, [np.pi]*4, args=args,
+                                     maxiter=10)
+
+        ref_flags = np.array([eim._ECONVERGED,
+                              eim._ESIGNERR,
+                              eim._ECONVERR,
+                              eim._EVALUEERR])
+        assert_equal(res.status, ref_flags)
+
+    def test_convergence(self):
+        # Test that the convergence tolerances behave as expected
+        rng = np.random.default_rng(2585255913088665241)
+        p = rng.random(size=3)
+        bracket = (-5, 0, 5)
+        args = (p,)
+        kwargs0 = dict(args=args, xatol=0, xrtol=0, fatol=0, frtol=0)
+
+        kwargs = kwargs0.copy()
+        kwargs['xatol'] = 1e-3
+        res1 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        j1 = abs(res1.xr - res1.xl)
+        assert_array_less(j1, 4*kwargs['xatol'])
+        kwargs['xatol'] = 1e-6
+        res2 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        j2 = abs(res2.xr - res2.xl)
+        assert_array_less(j2, 4*kwargs['xatol'])
+        assert_array_less(j2, j1)
+
+        kwargs = kwargs0.copy()
+        kwargs['xrtol'] = 1e-3
+        res1 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        j1 = abs(res1.xr - res1.xl)
+        assert_array_less(j1, 4*kwargs['xrtol']*abs(res1.x))
+        kwargs['xrtol'] = 1e-6
+        res2 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        j2 = abs(res2.xr - res2.xl)
+        assert_array_less(j2, 4*kwargs['xrtol']*abs(res2.x))
+        assert_array_less(j2, j1)
+
+        kwargs = kwargs0.copy()
+        kwargs['fatol'] = 1e-3
+        res1 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        h1 = abs(res1.fl - 2 * res1.fm + res1.fr)
+        assert_array_less(h1, 2*kwargs['fatol'])
+        kwargs['fatol'] = 1e-6
+        res2 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        h2 = abs(res2.fl - 2 * res2.fm + res2.fr)
+        assert_array_less(h2, 2*kwargs['fatol'])
+        assert_array_less(h2, h1)
+
+        kwargs = kwargs0.copy()
+        kwargs['frtol'] = 1e-3
+        res1 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        h1 = abs(res1.fl - 2 * res1.fm + res1.fr)
+        assert_array_less(h1, 2*kwargs['frtol']*abs(res1.fun))
+        kwargs['frtol'] = 1e-6
+        res2 = _chandrupatla_minimize(self.f, *bracket, **kwargs)
+        h2 = abs(res2.fl - 2 * res2.fm + res2.fr)
+        assert_array_less(h2, 2*kwargs['frtol']*abs(res2.fun))
+        assert_array_less(h2, h1)
+
+    def test_maxiter_callback(self):
+        # Test behavior of `maxiter` parameter and `callback` interface
+        loc = 0.612814
+        bracket = (-5, 0, 5)
+        maxiter = 5
+
+        res = _chandrupatla_minimize(self.f, *bracket, args=(loc,),
+                                     maxiter=maxiter)
+        assert not np.any(res.success)
+        assert np.all(res.nfev == maxiter+3)
+        assert np.all(res.nit == maxiter)
+
+        def callback(res):
+            callback.iter += 1
+            callback.res = res
+            assert hasattr(res, 'x')
+            if callback.iter == 0:
+                # callback is called once with initial bracket
+                assert (res.xl, res.xm, res.xr) == bracket
+            else:
+                changed_xr = (res.xl == callback.xl) & (res.xr != callback.xr)
+                changed_xl = (res.xl != callback.xl) & (res.xr == callback.xr)
+                assert np.all(changed_xr | changed_xl)
+
+            callback.xl = res.xl
+            callback.xr = res.xr
+            assert res.status == eim._EINPROGRESS
+            assert_equal(self.f(res.xl, loc), res.fl)
+            assert_equal(self.f(res.xm, loc), res.fm)
+            assert_equal(self.f(res.xr, loc), res.fr)
+            assert_equal(self.f(res.x, loc), res.fun)
+            if callback.iter == maxiter:
+                raise StopIteration
+
+        callback.xl = np.nan
+        callback.xr = np.nan
+        callback.iter = -1  # callback called once before first iteration
+        callback.res = None
+
+        res2 = _chandrupatla_minimize(self.f, *bracket, args=(loc,),
+                                      callback=callback)
+
+        # terminating with callback is identical to terminating due to maxiter
+        # (except for `status`)
+        for key in res.keys():
+            if key == 'status':
+                assert res[key] == eim._ECONVERR
+                assert callback.res[key] == eim._EINPROGRESS
+                assert res2[key] == eim._ECALLBACK
+            else:
+                assert res2[key] == callback.res[key] == res[key]
+
+    @pytest.mark.parametrize('case', cases)
+    def test_nit_expected(self, case):
+        # Test that `_chandrupatla` implements Chandrupatla's algorithm:
+        # in all 55 test cases, the number of iterations performed
+        # matches the number reported in the original paper.
+        func, x1, nit = case
+
+        # Find bracket using the algorithm in the paper
+        step = 0.2
+        x2 = x1 + step
+        x1, x2, x3, f1, f2, f3 = _bracket_minimum(func, x1, x2)
+
+        # Use tolerances from original paper
+        xatol = 0.0001
+        fatol = 0.000001
+        xrtol = 1e-16
+        frtol = 1e-16
+
+        res = _chandrupatla_minimize(func, x1, x2, x3, xatol=xatol,
+                                     fatol=fatol, xrtol=xrtol, frtol=frtol)
+        assert_equal(res.nit, nit)
+
+    @pytest.mark.parametrize("loc", (0.65, [0.65, 0.7]))
+    @pytest.mark.parametrize("dtype", (np.float16, np.float32, np.float64))
+    def test_dtype(self, loc, dtype):
+        # Test that dtypes are preserved
+
+        loc = dtype(loc)
+
+        def f(x, loc):
+            assert x.dtype == dtype
+            return ((x - loc) ** 2).astype(dtype)
+
+        res = _chandrupatla_minimize(f, dtype(-3), dtype(1), dtype(5),
+                                     args=(loc,))
+        assert res.x.dtype == dtype
+        assert_allclose(res.x, loc, rtol=np.sqrt(np.finfo(dtype).eps))
+
+    def test_input_validation(self):
+        # Test input validation for appropriate error messages
+
+        message = '`func` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(None, -4, 0, 4)
+
+        message = 'Abscissae and function output must be real numbers.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4+1j, 0, 4)
+
+        message = "shape mismatch: objects cannot be broadcast"
+        # raised by `np.broadcast, but the traceback is readable IMO
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, [-2, -3], [0, 0], [3, 4, 5])
+
+        message = "The shape of the array returned by `func` must be the same"
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: [x[0], x[1], x[1]], [-3, -3],
+                                   [0, 0], [5, 5])
+
+        message = 'Tolerances must be non-negative scalars.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, xatol=-1)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, xrtol=np.nan)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, fatol='ekki')
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, frtol=np.nan)
+
+        message = '`maxiter` must be a non-negative integer.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, maxiter=1.5)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, maxiter=-1)
+
+        message = '`callback` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_minimize(lambda x: x, -4, 0, 4, callback='shrubbery')
+
+    def test_bracket_order(self):
+        # Confirm that order of points in bracket doesn't matter
+        loc = np.linspace(-1, 1, 6)[:, np.newaxis]
+        brackets = np.array(list(permutations([-5, 0, 5]))).T
+        res = _chandrupatla_minimize(self.f, *brackets, args=(loc,))
+        assert np.all(np.isclose(res.x, loc) | (res.fun == self.f(loc, loc)))
+        ref = res.x[:, 0]  # all columns should be the same
+        assert_allclose(*np.broadcast_arrays(res.x.T, ref), rtol=1e-15)
+
+    def test_special_cases(self):
+        # Test edge cases and other special cases
+
+        # Test that integers are not passed to `f`
+        # (otherwise this would overflow)
+        def f(x):
+            assert np.issubdtype(x.dtype, np.floating)
+            return (x-1) ** 100
+
+        with np.errstate(invalid='ignore'):
+            res = _chandrupatla_minimize(f, -7, 0, 8, fatol=0, frtol=0)
+        assert res.success
+        assert_allclose(res.x, 1, rtol=1e-3)
+        assert_equal(res.fun, 0)
+
+        # Test that if all elements of bracket equal minimizer, algorithm
+        # reports convergence
+        def f(x):
+            return (x-1)**2
+
+        res = _chandrupatla_minimize(f, 1, 1, 1)
+        assert res.success
+        assert_equal(res.x, 1)
+
+        # Test maxiter = 0. Should do nothing to bracket.
+        def f(x):
+            return (x-1)**2
+
+        bracket = (-3, 1.1, 5)
+        res = _chandrupatla_minimize(f, *bracket, maxiter=0)
+        assert res.xl, res.xr == bracket
+        assert res.nit == 0
+        assert res.nfev == 3
+        assert res.status == -2
+        assert res.x == 1.1  # best so far
+
+        # Test scalar `args` (not in tuple)
+        def f(x, c):
+            return (x-c)**2 - 1
+
+        res = _chandrupatla_minimize(f, -1, 0, 1, args=1/3)
+        assert_allclose(res.x, 1/3)
+
+        # Test zero tolerances
+        # TODO: fatol/frtol = 0?
+        def f(x):
+            return -np.sin(x)
+
+        res = _chandrupatla_minimize(f, 0, 1, np.pi, xatol=0, xrtol=0,
+                                     fatol=0, frtol=0)
+        assert res.success
+        # found a minimum exactly (according to floating point arithmetic)
+        assert res.xl < res.xm < res.xr
+        assert f(res.xl) == f(res.xm) == f(res.xr)
+
+
+@array_api_compatible
+@pytest.mark.usefixtures("skip_xp_backends")
+@pytest.mark.skip_xp_backends('array_api_strict', 'jax.numpy',
+                              reasons=['Currently uses fancy indexing assignment.',
+                                       'JAX arrays do not support item assignment.'])
+class TestChandrupatla(TestScalarRootFinders):
+
+    def f(self, q, p):
+        return special.ndtr(q) - p
+
+    @pytest.mark.parametrize('p', [0.6, np.linspace(-0.05, 1.05, 10)])
+    def test_basic(self, p, xp):
+        # Invert distribution CDF and compare against distrtibution `ppf`
+        a, b = xp.asarray(-5.), xp.asarray(5.)
+        res = _chandrupatla_root(self.f, a, b, args=(xp.asarray(p),))
+        ref = xp.asarray(stats.norm().ppf(p), dtype=xp.asarray(p).dtype)
+        xp_assert_close(res.x, ref)
+
+    @pytest.mark.parametrize('shape', [tuple(), (12,), (3, 4), (3, 2, 2)])
+    def test_vectorization(self, shape, xp):
+        # Test for correct functionality, output shapes, and dtypes for various
+        # input shapes.
+        p = (np.linspace(-0.05, 1.05, 12).reshape(shape) if shape
+             else np.float64(0.6))
+        p_xp = xp.asarray(p)
+        args_xp = (p_xp,)
+        dtype = p_xp.dtype
+        xp_test = array_namespace(p_xp)  # need xp.bool
+
+        @np.vectorize
+        def chandrupatla_single(p):
+            return _chandrupatla_root(self.f, -5, 5, args=(p,))
+
+        def f(*args, **kwargs):
+            f.f_evals += 1
+            return self.f(*args, **kwargs)
+        f.f_evals = 0
+
+        res = _chandrupatla_root(f, xp.asarray(-5.), xp.asarray(5.), args=args_xp)
+        refs = chandrupatla_single(p).ravel()
+
+        ref_x = [ref.x for ref in refs]
+        ref_x = xp.reshape(xp.asarray(ref_x, dtype=dtype), shape)
+        xp_assert_close(res.x, ref_x)
+
+        ref_fun = [ref.fun for ref in refs]
+        ref_fun = xp.reshape(xp.asarray(ref_fun, dtype=dtype), shape)
+        xp_assert_close(res.fun, ref_fun, atol=1e-15)
+        xp_assert_equal(res.fun, self.f(res.x, *args_xp))
+
+        ref_success = [bool(ref.success) for ref in refs]
+        ref_success = xp.reshape(xp.asarray(ref_success, dtype=xp_test.bool), shape)
+        xp_assert_equal(res.success, ref_success)
+
+        ref_flag = [ref.status for ref in refs]
+        ref_flag = xp.reshape(xp.asarray(ref_flag, dtype=xp.int32), shape)
+        xp_assert_equal(res.status, ref_flag)
+
+        ref_nfev = [ref.nfev for ref in refs]
+        ref_nfev = xp.reshape(xp.asarray(ref_nfev, dtype=xp.int32), shape)
+        if is_numpy(xp):
+            xp_assert_equal(res.nfev, ref_nfev)
+            assert xp.max(res.nfev) == f.f_evals
+        else:  # different backend may lead to different nfev
+            assert res.nfev.shape == shape
+            assert res.nfev.dtype == xp.int32
+
+        ref_nit = [ref.nit for ref in refs]
+        ref_nit = xp.reshape(xp.asarray(ref_nit, dtype=xp.int32), shape)
+        if is_numpy(xp):
+            xp_assert_equal(res.nit, ref_nit)
+            assert xp.max(res.nit) == f.f_evals-2
+        else:
+            assert res.nit.shape == shape
+            assert res.nit.dtype == xp.int32
+
+        ref_xl = [ref.xl for ref in refs]
+        ref_xl = xp.reshape(xp.asarray(ref_xl, dtype=dtype), shape)
+        xp_assert_close(res.xl, ref_xl)
+
+        ref_xr = [ref.xr for ref in refs]
+        ref_xr = xp.reshape(xp.asarray(ref_xr, dtype=dtype), shape)
+        xp_assert_close(res.xr, ref_xr)
+
+        xp_assert_less(res.xl, res.xr)
+        finite = xp.isfinite(res.x)
+        assert xp.all((res.x[finite] == res.xl[finite])
+                      | (res.x[finite] == res.xr[finite]))
+
+        # PyTorch and CuPy don't solve to the same accuracy as NumPy - that's OK.
+        atol = 1e-15 if is_numpy(xp) else 1e-9
+
+        ref_fl = [ref.fl for ref in refs]
+        ref_fl = xp.reshape(xp.asarray(ref_fl, dtype=dtype), shape)
+        xp_assert_close(res.fl, ref_fl, atol=atol)
+        xp_assert_equal(res.fl, self.f(res.xl, *args_xp))
+
+        ref_fr = [ref.fr for ref in refs]
+        ref_fr = xp.reshape(xp.asarray(ref_fr, dtype=dtype), shape)
+        xp_assert_close(res.fr, ref_fr, atol=atol)
+        xp_assert_equal(res.fr, self.f(res.xr, *args_xp))
+
+        assert xp.all(xp.abs(res.fun[finite]) ==
+                      xp_minimum(xp.abs(res.fl[finite]),
+                                 xp.abs(res.fr[finite])))
+
+    def test_flags(self, xp):
+        # Test cases that should produce different status flags; show that all
+        # can be produced simultaneously.
+        def f(xs, js):
+            # Note that full_like and int(j) shouldn't really be required. CuPy
+            # is just really picky here, so I'm making it a special case to
+            # make sure the other backends work when the user is less careful.
+            assert js.dtype == xp.int64
+            if is_cupy(xp):
+                funcs = [lambda x: x - 2.5,
+                         lambda x: x - 10,
+                         lambda x: (x - 0.1)**3,
+                         lambda x: xp.full_like(x, xp.nan)]
+                return [funcs[int(j)](x) for x, j in zip(xs, js)]
+
+            funcs = [lambda x: x - 2.5,
+                     lambda x: x - 10,
+                     lambda x: (x - 0.1) ** 3,
+                     lambda x: xp.nan]
+            return [funcs[j](x) for x, j in zip(xs, js)]
+
+        args = (xp.arange(4, dtype=xp.int64),)
+        a, b = xp.asarray([0.]*4), xp.asarray([xp.pi]*4)
+        res = _chandrupatla_root(f, a, b, args=args, maxiter=2)
+
+        ref_flags = xp.asarray([eim._ECONVERGED,
+                                eim._ESIGNERR,
+                                eim._ECONVERR,
+                                eim._EVALUEERR], dtype=xp.int32)
+        xp_assert_equal(res.status, ref_flags)
+
+    def test_convergence(self, xp):
+        # Test that the convergence tolerances behave as expected
+        rng = np.random.default_rng(2585255913088665241)
+        p = xp.asarray(rng.random(size=3))
+        bracket = (-xp.asarray(5.), xp.asarray(5.))
+        args = (p,)
+        kwargs0 = dict(args=args, xatol=0, xrtol=0, fatol=0, frtol=0)
+
+        kwargs = kwargs0.copy()
+        kwargs['xatol'] = 1e-3
+        res1 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(res1.xr - res1.xl, xp.full_like(p, 1e-3))
+        kwargs['xatol'] = 1e-6
+        res2 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(res2.xr - res2.xl, xp.full_like(p, 1e-6))
+        xp_assert_less(res2.xr - res2.xl, res1.xr - res1.xl)
+
+        kwargs = kwargs0.copy()
+        kwargs['xrtol'] = 1e-3
+        res1 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(res1.xr - res1.xl, 1e-3 * xp.abs(res1.x))
+        kwargs['xrtol'] = 1e-6
+        res2 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(res2.xr - res2.xl, 1e-6 * xp.abs(res2.x))
+        xp_assert_less(res2.xr - res2.xl, res1.xr - res1.xl)
+
+        kwargs = kwargs0.copy()
+        kwargs['fatol'] = 1e-3
+        res1 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(xp.abs(res1.fun), xp.full_like(p, 1e-3))
+        kwargs['fatol'] = 1e-6
+        res2 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(xp.abs(res2.fun), xp.full_like(p, 1e-6))
+        xp_assert_less(xp.abs(res2.fun), xp.abs(res1.fun))
+
+        kwargs = kwargs0.copy()
+        kwargs['frtol'] = 1e-3
+        x1, x2 = bracket
+        f0 = xp_minimum(xp.abs(self.f(x1, *args)), xp.abs(self.f(x2, *args)))
+        res1 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(xp.abs(res1.fun), 1e-3*f0)
+        kwargs['frtol'] = 1e-6
+        res2 = _chandrupatla_root(self.f, *bracket, **kwargs)
+        xp_assert_less(xp.abs(res2.fun), 1e-6*f0)
+        xp_assert_less(xp.abs(res2.fun), xp.abs(res1.fun))
+
+    def test_maxiter_callback(self, xp):
+        # Test behavior of `maxiter` parameter and `callback` interface
+        p = xp.asarray(0.612814)
+        bracket = (xp.asarray(-5.), xp.asarray(5.))
+        maxiter = 5
+
+        def f(q, p):
+            res = special.ndtr(q) - p
+            f.x = q
+            f.fun = res
+            return res
+        f.x = None
+        f.fun = None
+
+        res = _chandrupatla_root(f, *bracket, args=(p,), maxiter=maxiter)
+        assert not xp.any(res.success)
+        assert xp.all(res.nfev == maxiter+2)
+        assert xp.all(res.nit == maxiter)
+
+        def callback(res):
+            callback.iter += 1
+            callback.res = res
+            assert hasattr(res, 'x')
+            if callback.iter == 0:
+                # callback is called once with initial bracket
+                assert (res.xl, res.xr) == bracket
+            else:
+                changed = (((res.xl == callback.xl) & (res.xr != callback.xr))
+                           | ((res.xl != callback.xl) & (res.xr == callback.xr)))
+                assert xp.all(changed)
+
+            callback.xl = res.xl
+            callback.xr = res.xr
+            assert res.status == eim._EINPROGRESS
+            xp_assert_equal(self.f(res.xl, p), res.fl)
+            xp_assert_equal(self.f(res.xr, p), res.fr)
+            xp_assert_equal(self.f(res.x, p), res.fun)
+            if callback.iter == maxiter:
+                raise StopIteration
+        callback.iter = -1  # callback called once before first iteration
+        callback.res = None
+        callback.xl = None
+        callback.xr = None
+
+        res2 = _chandrupatla_root(f, *bracket, args=(p,), callback=callback)
+
+        # terminating with callback is identical to terminating due to maxiter
+        # (except for `status`)
+        for key in res.keys():
+            if key == 'status':
+                xp_assert_equal(res[key], xp.asarray(eim._ECONVERR, dtype=xp.int32))
+                xp_assert_equal(res2[key], xp.asarray(eim._ECALLBACK, dtype=xp.int32))
+            elif key.startswith('_'):
+                continue
+            else:
+                xp_assert_equal(res2[key], res[key])
+
+    @pytest.mark.parametrize('case', _CHANDRUPATLA_TESTS)
+    def test_nit_expected(self, case, xp):
+        # Test that `_chandrupatla` implements Chandrupatla's algorithm:
+        # in all 40 test cases, the number of iterations performed
+        # matches the number reported in the original paper.
+        f, bracket, root, nfeval, id = case
+        # Chandrupatla's criterion is equivalent to
+        # abs(x2-x1) < 4*abs(xmin)*xrtol + xatol, but we use the more standard
+        # abs(x2-x1) < abs(xmin)*xrtol + xatol. Therefore, set xrtol to 4x
+        # that used by Chandrupatla in tests.
+        bracket = (xp.asarray(bracket[0], dtype=xp.float64),
+                   xp.asarray(bracket[1], dtype=xp.float64))
+        root = xp.asarray(root, dtype=xp.float64)
+
+        res = _chandrupatla_root(f, *bracket, xrtol=4e-10, xatol=1e-5)
+        xp_assert_close(res.fun, xp.asarray(f(root), dtype=xp.float64),
+                        rtol=1e-8, atol=2e-3)
+        xp_assert_equal(res.nfev, xp.asarray(nfeval, dtype=xp.int32))
+
+    @pytest.mark.parametrize("root", (0.622, [0.622, 0.623]))
+    @pytest.mark.parametrize("dtype", ('float16', 'float32', 'float64'))
+    def test_dtype(self, root, dtype, xp):
+        # Test that dtypes are preserved
+        not_numpy = not is_numpy(xp)
+        if not_numpy and dtype == 'float16':
+            pytest.skip("`float16` dtype only supported for NumPy arrays.")
+
+        dtype = getattr(xp, dtype, None)
+        if dtype is None:
+            pytest.skip(f"{xp} does not support {dtype}")
+
+        def f(x, root):
+            res = (x - root) ** 3.
+            if is_numpy(xp):  # NumPy does not preserve dtype
+                return xp.asarray(res, dtype=dtype)
+            return res
+
+        a, b = xp.asarray(-3, dtype=dtype), xp.asarray(3, dtype=dtype)
+        root = xp.asarray(root, dtype=dtype)
+        res = _chandrupatla_root(f, a, b, args=(root,), xatol=1e-3)
+        try:
+            xp_assert_close(res.x, root, atol=1e-3)
+        except AssertionError:
+            assert res.x.dtype == dtype
+            xp.all(res.fun == 0)
+
+    def test_input_validation(self, xp):
+        # Test input validation for appropriate error messages
+
+        def func(x):
+            return x
+
+        message = '`func` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            bracket = xp.asarray(-4), xp.asarray(4)
+            _chandrupatla_root(None, *bracket)
+
+        message = 'Abscissae and function output must be real numbers.'
+        with pytest.raises(ValueError, match=message):
+            bracket = xp.asarray(-4+1j), xp.asarray(4)
+            _chandrupatla_root(func, *bracket)
+
+        # raised by `np.broadcast, but the traceback is readable IMO
+        message = "...not be broadcast..."  # all messages include this part
+        with pytest.raises((ValueError, RuntimeError), match=message):
+            bracket = xp.asarray([-2, -3]), xp.asarray([3, 4, 5])
+            _chandrupatla_root(func, *bracket)
+
+        message = "The shape of the array returned by `func`..."
+        with pytest.raises(ValueError, match=message):
+            bracket = xp.asarray([-3, -3]), xp.asarray([5, 5])
+            _chandrupatla_root(lambda x: [x[0], x[1], x[1]], *bracket)
+
+        message = 'Tolerances must be non-negative scalars.'
+        bracket = xp.asarray(-4), xp.asarray(4)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, xatol=-1)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, xrtol=xp.nan)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, fatol='ekki')
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, frtol=xp.nan)
+
+        message = '`maxiter` must be a non-negative integer.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, maxiter=1.5)
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, maxiter=-1)
+
+        message = '`callback` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            _chandrupatla_root(func, *bracket, callback='shrubbery')
+
+    def test_special_cases(self, xp):
+        # Test edge cases and other special cases
+
+        # Test infinite function values
+        def f(x):
+            return 1 / x + 1 - 1 / (-x + 1)
+
+        a, b = xp.asarray([0.1, 0., 0., 0.1]),  xp.asarray([0.9, 1.0, 0.9, 1.0])
+
+        with np.errstate(divide='ignore', invalid='ignore'):
+            res = _chandrupatla_root(f, a, b)
+
+        assert xp.all(res.success)
+        xp_assert_close(res.x[1:], xp.full((3,), res.x[0]))
+
+        # Test that integers are not passed to `f`
+        # (otherwise this would overflow)
+        xp_test = array_namespace(a)  # need isdtype
+        def f(x):
+            assert xp_test.isdtype(x.dtype, "real floating")
+            # this would overflow if x were an xp integer dtype
+            return x ** 31 - 1
+
+        # note that all inputs are integer type; result is automatically default float
+        res = _chandrupatla_root(f, xp.asarray(-7), xp.asarray(5))
+        assert res.success
+        xp_assert_close(res.x, xp.asarray(1.))
+
+        # Test that if both ends of bracket equal root, algorithm reports
+        # convergence.
+        def f(x, root):
+            return x**2 - root
+
+        root = xp.asarray([0, 1])
+        res = _chandrupatla_root(f, xp.asarray(1), xp.asarray(1), args=(root,))
+        xp_assert_equal(res.success, xp.asarray([False, True]))
+        xp_assert_equal(res.x, xp.asarray([np.nan, 1.]))
+
+        def f(x):
+            return 1/x
+
+        with np.errstate(invalid='ignore'):
+            inf = xp.asarray(xp.inf)
+            res = _chandrupatla_root(f, inf, inf)
+        assert res.success
+        xp_assert_equal(res.x, xp.asarray(np.inf))
+
+        # Test maxiter = 0. Should do nothing to bracket.
+        def f(x):
+            return x**3 - 1
+
+        a, b = xp.asarray(-3.), xp.asarray(5.)
+        res = _chandrupatla_root(f, a, b, maxiter=0)
+        xp_assert_equal(res.success, xp.asarray(False))
+        xp_assert_equal(res.status, xp.asarray(-2, dtype=xp.int32))
+        xp_assert_equal(res.nit, xp.asarray(0, dtype=xp.int32))
+        xp_assert_equal(res.nfev, xp.asarray(2, dtype=xp.int32))
+        xp_assert_equal(res.xl, a)
+        xp_assert_equal(res.xr, b)
+        # The `x` attribute is the one with the smaller function value
+        xp_assert_equal(res.x, a)
+        # Reverse bracket; check that this is still true
+        res = _chandrupatla_root(f, -b, -a, maxiter=0)
+        xp_assert_equal(res.x, -a)
+
+        # Test maxiter = 1
+        res = _chandrupatla_root(f, a, b, maxiter=1)
+        xp_assert_equal(res.success, xp.asarray(True))
+        xp_assert_equal(res.status, xp.asarray(0, dtype=xp.int32))
+        xp_assert_equal(res.nit, xp.asarray(1, dtype=xp.int32))
+        xp_assert_equal(res.nfev, xp.asarray(3, dtype=xp.int32))
+        xp_assert_close(res.x, xp.asarray(1.))
+
+        # Test scalar `args` (not in tuple)
+        def f(x, c):
+            return c*x - 1
+
+        res = _chandrupatla_root(f, xp.asarray(-1), xp.asarray(1), args=xp.asarray(3))
+        xp_assert_close(res.x, xp.asarray(1/3))
+
+        # # TODO: Test zero tolerance
+        # # ~~What's going on here - why are iterations repeated?~~
+        # # tl goes to zero when xatol=xrtol=0. When function is nearly linear,
+        # # this causes convergence issues.
+        # def f(x):
+        #     return np.cos(x)
+        #
+        # res = _chandrupatla_root(f, 0, np.pi, xatol=0, xrtol=0)
+        # assert res.nit < 100
+        # xp = np.nextafter(res.x, np.inf)
+        # xm = np.nextafter(res.x, -np.inf)
+        # assert np.abs(res.fun) < np.abs(f(xp))
+        # assert np.abs(res.fun) < np.abs(f(xm))
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cobyla.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cobyla.py
new file mode 100644
index 0000000000000000000000000000000000000000..11663ce778beb7e1046143b93fe2508f469727c1
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cobyla.py
@@ -0,0 +1,166 @@
+import math
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_, assert_array_equal
+import pytest
+
+from scipy.optimize import fmin_cobyla, minimize, Bounds
+
+
+class TestCobyla:
+    def setup_method(self):
+        self.x0 = [4.95, 0.66]
+        self.solution = [math.sqrt(25 - (2.0/3)**2), 2.0/3]
+        self.opts = {'disp': False, 'rhobeg': 1, 'tol': 1e-5,
+                     'maxiter': 100}
+
+    def fun(self, x):
+        return x[0]**2 + abs(x[1])**3
+
+    def con1(self, x):
+        return x[0]**2 + x[1]**2 - 25
+
+    def con2(self, x):
+        return -self.con1(x)
+
+    @pytest.mark.xslow(True, reason='not slow, but noisy so only run rarely')
+    def test_simple(self, capfd):
+        # use disp=True as smoke test for gh-8118
+        x = fmin_cobyla(self.fun, self.x0, [self.con1, self.con2], rhobeg=1,
+                        rhoend=1e-5, maxfun=100, disp=True)
+        assert_allclose(x, self.solution, atol=1e-4)
+
+    def test_minimize_simple(self):
+        class Callback:
+            def __init__(self):
+                self.n_calls = 0
+                self.last_x = None
+
+            def __call__(self, x):
+                self.n_calls += 1
+                self.last_x = x
+
+        callback = Callback()
+
+        # Minimize with method='COBYLA'
+        cons = ({'type': 'ineq', 'fun': self.con1},
+                {'type': 'ineq', 'fun': self.con2})
+        sol = minimize(self.fun, self.x0, method='cobyla', constraints=cons,
+                       callback=callback, options=self.opts)
+        assert_allclose(sol.x, self.solution, atol=1e-4)
+        assert_(sol.success, sol.message)
+        assert_(sol.maxcv < 1e-5, sol)
+        assert_(sol.nfev < 70, sol)
+        assert_(sol.fun < self.fun(self.solution) + 1e-3, sol)
+        assert_(sol.nfev == callback.n_calls,
+                "Callback is not called exactly once for every function eval.")
+        assert_array_equal(
+            sol.x,
+            callback.last_x,
+            "Last design vector sent to the callback is not equal to returned value.",
+        )
+
+    def test_minimize_constraint_violation(self):
+        np.random.seed(1234)
+        pb = np.random.rand(10, 10)
+        spread = np.random.rand(10)
+
+        def p(w):
+            return pb.dot(w)
+
+        def f(w):
+            return -(w * spread).sum()
+
+        def c1(w):
+            return 500 - abs(p(w)).sum()
+
+        def c2(w):
+            return 5 - abs(p(w).sum())
+
+        def c3(w):
+            return 5 - abs(p(w)).max()
+
+        cons = ({'type': 'ineq', 'fun': c1},
+                {'type': 'ineq', 'fun': c2},
+                {'type': 'ineq', 'fun': c3})
+        w0 = np.zeros((10,))
+        sol = minimize(f, w0, method='cobyla', constraints=cons,
+                       options={'catol': 1e-6})
+        assert_(sol.maxcv > 1e-6)
+        assert_(not sol.success)
+
+
+def test_vector_constraints():
+    # test that fmin_cobyla and minimize can take a combination
+    # of constraints, some returning a number and others an array
+    def fun(x):
+        return (x[0] - 1)**2 + (x[1] - 2.5)**2
+
+    def fmin(x):
+        return fun(x) - 1
+
+    def cons1(x):
+        a = np.array([[1, -2, 2], [-1, -2, 6], [-1, 2, 2]])
+        return np.array([a[i, 0] * x[0] + a[i, 1] * x[1] +
+                         a[i, 2] for i in range(len(a))])
+
+    def cons2(x):
+        return x     # identity, acts as bounds x > 0
+
+    x0 = np.array([2, 0])
+    cons_list = [fun, cons1, cons2]
+
+    xsol = [1.4, 1.7]
+    fsol = 0.8
+
+    # testing fmin_cobyla
+    sol = fmin_cobyla(fun, x0, cons_list, rhoend=1e-5)
+    assert_allclose(sol, xsol, atol=1e-4)
+
+    sol = fmin_cobyla(fun, x0, fmin, rhoend=1e-5)
+    assert_allclose(fun(sol), 1, atol=1e-4)
+
+    # testing minimize
+    constraints = [{'type': 'ineq', 'fun': cons} for cons in cons_list]
+    sol = minimize(fun, x0, constraints=constraints, tol=1e-5)
+    assert_allclose(sol.x, xsol, atol=1e-4)
+    assert_(sol.success, sol.message)
+    assert_allclose(sol.fun, fsol, atol=1e-4)
+
+    constraints = {'type': 'ineq', 'fun': fmin}
+    sol = minimize(fun, x0, constraints=constraints, tol=1e-5)
+    assert_allclose(sol.fun, 1, atol=1e-4)
+
+
+class TestBounds:
+    # Test cobyla support for bounds (only when used via `minimize`)
+    # Invalid bounds is tested in
+    # test_optimize.TestOptimizeSimple.test_minimize_invalid_bounds
+
+    def test_basic(self):
+        def f(x):
+            return np.sum(x**2)
+
+        lb = [-1, None, 1, None, -0.5]
+        ub = [-0.5, -0.5, None, None, -0.5]
+        bounds = [(a, b) for a, b in zip(lb, ub)]
+        # these are converted to Bounds internally
+
+        res = minimize(f, x0=[1, 2, 3, 4, 5], method='cobyla', bounds=bounds)
+        ref = [-0.5, -0.5, 1, 0, -0.5]
+        assert res.success
+        assert_allclose(res.x, ref, atol=1e-3)
+
+    def test_unbounded(self):
+        def f(x):
+            return np.sum(x**2)
+
+        bounds = Bounds([-np.inf, -np.inf], [np.inf, np.inf])
+        res = minimize(f, x0=[1, 2], method='cobyla', bounds=bounds)
+        assert res.success
+        assert_allclose(res.x, 0, atol=1e-3)
+
+        bounds = Bounds([1, -np.inf], [np.inf, np.inf])
+        res = minimize(f, x0=[1, 2], method='cobyla', bounds=bounds)
+        assert res.success
+        assert_allclose(res.x, [1, 0], atol=1e-3)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_constraints.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_constraints.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c4186ba7b6dd6f56b89e2f39add9eb16e6beccb
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_constraints.py
@@ -0,0 +1,255 @@
+import pytest
+import numpy as np
+from numpy.testing import TestCase, assert_array_equal
+import scipy.sparse as sps
+from scipy.optimize._constraints import (
+    Bounds, LinearConstraint, NonlinearConstraint, PreparedConstraint,
+    new_bounds_to_old, old_bound_to_new, strict_bounds)
+
+
+class TestStrictBounds(TestCase):
+    def test_scalarvalue_unique_enforce_feasibility(self):
+        m = 3
+        lb = 2
+        ub = 4
+        enforce_feasibility = False
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                             enforce_feasibility,
+                                             m)
+        assert_array_equal(strict_lb, [-np.inf, -np.inf, -np.inf])
+        assert_array_equal(strict_ub, [np.inf, np.inf, np.inf])
+
+        enforce_feasibility = True
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                             enforce_feasibility,
+                                             m)
+        assert_array_equal(strict_lb, [2, 2, 2])
+        assert_array_equal(strict_ub, [4, 4, 4])
+
+    def test_vectorvalue_unique_enforce_feasibility(self):
+        m = 3
+        lb = [1, 2, 3]
+        ub = [4, 5, 6]
+        enforce_feasibility = False
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                              enforce_feasibility,
+                                              m)
+        assert_array_equal(strict_lb, [-np.inf, -np.inf, -np.inf])
+        assert_array_equal(strict_ub, [np.inf, np.inf, np.inf])
+
+        enforce_feasibility = True
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                              enforce_feasibility,
+                                              m)
+        assert_array_equal(strict_lb, [1, 2, 3])
+        assert_array_equal(strict_ub, [4, 5, 6])
+
+    def test_scalarvalue_vector_enforce_feasibility(self):
+        m = 3
+        lb = 2
+        ub = 4
+        enforce_feasibility = [False, True, False]
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                             enforce_feasibility,
+                                             m)
+        assert_array_equal(strict_lb, [-np.inf, 2, -np.inf])
+        assert_array_equal(strict_ub, [np.inf, 4, np.inf])
+
+    def test_vectorvalue_vector_enforce_feasibility(self):
+        m = 3
+        lb = [1, 2, 3]
+        ub = [4, 6, np.inf]
+        enforce_feasibility = [True, False, True]
+        strict_lb, strict_ub = strict_bounds(lb, ub,
+                                             enforce_feasibility,
+                                             m)
+        assert_array_equal(strict_lb, [1, -np.inf, 3])
+        assert_array_equal(strict_ub, [4, np.inf, np.inf])
+
+
+def test_prepare_constraint_infeasible_x0():
+    lb = np.array([0, 20, 30])
+    ub = np.array([0.5, np.inf, 70])
+    x0 = np.array([1, 2, 3])
+    enforce_feasibility = np.array([False, True, True], dtype=bool)
+    bounds = Bounds(lb, ub, enforce_feasibility)
+    pytest.raises(ValueError, PreparedConstraint, bounds, x0)
+
+    pc = PreparedConstraint(Bounds(lb, ub), [1, 2, 3])
+    assert (pc.violation([1, 2, 3]) > 0).any()
+    assert (pc.violation([0.25, 21, 31]) == 0).all()
+
+    x0 = np.array([1, 2, 3, 4])
+    A = np.array([[1, 2, 3, 4], [5, 0, 0, 6], [7, 0, 8, 0]])
+    enforce_feasibility = np.array([True, True, True], dtype=bool)
+    linear = LinearConstraint(A, -np.inf, 0, enforce_feasibility)
+    pytest.raises(ValueError, PreparedConstraint, linear, x0)
+
+    pc = PreparedConstraint(LinearConstraint(A, -np.inf, 0),
+                            [1, 2, 3, 4])
+    assert (pc.violation([1, 2, 3, 4]) > 0).any()
+    assert (pc.violation([-10, 2, -10, 4]) == 0).all()
+
+    def fun(x):
+        return A.dot(x)
+
+    def jac(x):
+        return A
+
+    def hess(x, v):
+        return sps.csr_matrix((4, 4))
+
+    nonlinear = NonlinearConstraint(fun, -np.inf, 0, jac, hess,
+                                    enforce_feasibility)
+    pytest.raises(ValueError, PreparedConstraint, nonlinear, x0)
+
+    pc = PreparedConstraint(nonlinear, [-10, 2, -10, 4])
+    assert (pc.violation([1, 2, 3, 4]) > 0).any()
+    assert (pc.violation([-10, 2, -10, 4]) == 0).all()
+
+
+def test_violation():
+    def cons_f(x):
+        return np.array([x[0] ** 2 + x[1], x[0] ** 2 - x[1]])
+
+    nlc = NonlinearConstraint(cons_f, [-1, -0.8500], [2, 2])
+    pc = PreparedConstraint(nlc, [0.5, 1])
+
+    assert_array_equal(pc.violation([0.5, 1]), [0., 0.])
+
+    np.testing.assert_almost_equal(pc.violation([0.5, 1.2]), [0., 0.1])
+
+    np.testing.assert_almost_equal(pc.violation([1.2, 1.2]), [0.64, 0])
+
+    np.testing.assert_almost_equal(pc.violation([0.1, -1.2]), [0.19, 0])
+
+    np.testing.assert_almost_equal(pc.violation([0.1, 2]), [0.01, 1.14])
+
+
+def test_new_bounds_to_old():
+    lb = np.array([-np.inf, 2, 3])
+    ub = np.array([3, np.inf, 10])
+
+    bounds = [(None, 3), (2, None), (3, 10)]
+    assert_array_equal(new_bounds_to_old(lb, ub, 3), bounds)
+
+    bounds_single_lb = [(-1, 3), (-1, None), (-1, 10)]
+    assert_array_equal(new_bounds_to_old(-1, ub, 3), bounds_single_lb)
+
+    bounds_no_lb = [(None, 3), (None, None), (None, 10)]
+    assert_array_equal(new_bounds_to_old(-np.inf, ub, 3), bounds_no_lb)
+
+    bounds_single_ub = [(None, 20), (2, 20), (3, 20)]
+    assert_array_equal(new_bounds_to_old(lb, 20, 3), bounds_single_ub)
+
+    bounds_no_ub = [(None, None), (2, None), (3, None)]
+    assert_array_equal(new_bounds_to_old(lb, np.inf, 3), bounds_no_ub)
+
+    bounds_single_both = [(1, 2), (1, 2), (1, 2)]
+    assert_array_equal(new_bounds_to_old(1, 2, 3), bounds_single_both)
+
+    bounds_no_both = [(None, None), (None, None), (None, None)]
+    assert_array_equal(new_bounds_to_old(-np.inf, np.inf, 3), bounds_no_both)
+
+
+def test_old_bounds_to_new():
+    bounds = ([1, 2], (None, 3), (-1, None))
+    lb_true = np.array([1, -np.inf, -1])
+    ub_true = np.array([2, 3, np.inf])
+
+    lb, ub = old_bound_to_new(bounds)
+    assert_array_equal(lb, lb_true)
+    assert_array_equal(ub, ub_true)
+
+    bounds = [(-np.inf, np.inf), (np.array([1]), np.array([1]))]
+    lb, ub = old_bound_to_new(bounds)
+
+    assert_array_equal(lb, [-np.inf, 1])
+    assert_array_equal(ub, [np.inf, 1])
+
+
+class TestBounds:
+    def test_repr(self):
+        # so that eval works
+        from numpy import array, inf  # noqa: F401
+        for args in (
+            (-1.0, 5.0),
+            (-1.0, np.inf, True),
+            (np.array([1.0, -np.inf]), np.array([2.0, np.inf])),
+            (np.array([1.0, -np.inf]), np.array([2.0, np.inf]),
+             np.array([True, False])),
+        ):
+            bounds = Bounds(*args)
+            bounds2 = eval(repr(Bounds(*args)))
+            assert_array_equal(bounds.lb, bounds2.lb)
+            assert_array_equal(bounds.ub, bounds2.ub)
+            assert_array_equal(bounds.keep_feasible, bounds2.keep_feasible)
+
+    def test_array(self):
+        # gh13501
+        b = Bounds(lb=[0.0, 0.0], ub=[1.0, 1.0])
+        assert isinstance(b.lb, np.ndarray)
+        assert isinstance(b.ub, np.ndarray)
+
+    def test_defaults(self):
+        b1 = Bounds()
+        b2 = Bounds(np.asarray(-np.inf), np.asarray(np.inf))
+        assert b1.lb == b2.lb
+        assert b1.ub == b2.ub
+
+    def test_input_validation(self):
+        message = "Lower and upper bounds must be dense arrays."
+        with pytest.raises(ValueError, match=message):
+            Bounds(sps.coo_array([1, 2]), [1, 2])
+        with pytest.raises(ValueError, match=message):
+            Bounds([1, 2], sps.coo_array([1, 2]))
+
+        message = "`keep_feasible` must be a dense array."
+        with pytest.raises(ValueError, match=message):
+            Bounds([1, 2], [1, 2], keep_feasible=sps.coo_array([True, True]))
+
+        message = "`lb`, `ub`, and `keep_feasible` must be broadcastable."
+        with pytest.raises(ValueError, match=message):
+            Bounds([1, 2], [1, 2, 3])
+
+    def test_residual(self):
+        bounds = Bounds(-2, 4)
+        x0 = [-1, 2]
+        np.testing.assert_allclose(bounds.residual(x0), ([1, 4], [5, 2]))
+
+
+class TestLinearConstraint:
+    def test_defaults(self):
+        A = np.eye(4)
+        lc = LinearConstraint(A)
+        lc2 = LinearConstraint(A, -np.inf, np.inf)
+        assert_array_equal(lc.lb, lc2.lb)
+        assert_array_equal(lc.ub, lc2.ub)
+
+    def test_input_validation(self):
+        A = np.eye(4)
+        message = "`lb`, `ub`, and `keep_feasible` must be broadcastable"
+        with pytest.raises(ValueError, match=message):
+            LinearConstraint(A, [1, 2], [1, 2, 3])
+
+        message = "Constraint limits must be dense arrays"
+        with pytest.raises(ValueError, match=message):
+            LinearConstraint(A, sps.coo_array([1, 2]), [2, 3])
+        with pytest.raises(ValueError, match=message):
+            LinearConstraint(A, [1, 2], sps.coo_array([2, 3]))
+
+        message = "`keep_feasible` must be a dense array"
+        with pytest.raises(ValueError, match=message):
+            keep_feasible = sps.coo_array([True, True])
+            LinearConstraint(A, [1, 2], [2, 3], keep_feasible=keep_feasible)
+
+        A = np.empty((4, 3, 5))
+        message = "`A` must have exactly two dimensions."
+        with pytest.raises(ValueError, match=message):
+            LinearConstraint(A)
+
+    def test_residual(self):
+        A = np.eye(2)
+        lc = LinearConstraint(A, -2, 4)
+        x0 = [-1, 2]
+        np.testing.assert_allclose(lc.residual(x0), ([1, 4], [5, 2]))
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cython_optimize.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cython_optimize.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f859c1143eb6b63c439fe278bfdd4fdaa15410f
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_cython_optimize.py
@@ -0,0 +1,92 @@
+"""
+Test Cython optimize zeros API functions: ``bisect``, ``ridder``, ``brenth``,
+and ``brentq`` in `scipy.optimize.cython_optimize`, by finding the roots of a
+3rd order polynomial given a sequence of constant terms, ``a0``, and fixed 1st,
+2nd, and 3rd order terms in ``args``.
+
+.. math::
+
+    f(x, a0, args) =  ((args[2]*x + args[1])*x + args[0])*x + a0
+
+The 3rd order polynomial function is written in Cython and called in a Python
+wrapper named after the zero function. See the private ``_zeros`` Cython module
+in `scipy.optimize.cython_optimze` for more information.
+"""
+
+import numpy.testing as npt
+from scipy.optimize.cython_optimize import _zeros
+
+# CONSTANTS
+# Solve x**3 - A0 = 0  for A0 = [2.0, 2.1, ..., 2.9].
+# The ARGS have 3 elements just to show how this could be done for any cubic
+# polynomial.
+A0 = tuple(-2.0 - x/10.0 for x in range(10))  # constant term
+ARGS = (0.0, 0.0, 1.0)  # 1st, 2nd, and 3rd order terms
+XLO, XHI = 0.0, 2.0  # first and second bounds of zeros functions
+# absolute and relative tolerances and max iterations for zeros functions
+XTOL, RTOL, MITR = 0.001, 0.001, 10
+EXPECTED = [(-a0) ** (1.0/3.0) for a0 in A0]
+# = [1.2599210498948732,
+#    1.2805791649874942,
+#    1.300591446851387,
+#    1.3200061217959123,
+#    1.338865900164339,
+#    1.3572088082974532,
+#    1.375068867074141,
+#    1.3924766500838337,
+#    1.4094597464129783,
+#    1.4260431471424087]
+
+
+# test bisect
+def test_bisect():
+    npt.assert_allclose(
+        EXPECTED,
+        list(
+            _zeros.loop_example('bisect', A0, ARGS, XLO, XHI, XTOL, RTOL, MITR)
+        ),
+        rtol=RTOL, atol=XTOL
+    )
+
+
+# test ridder
+def test_ridder():
+    npt.assert_allclose(
+        EXPECTED,
+        list(
+            _zeros.loop_example('ridder', A0, ARGS, XLO, XHI, XTOL, RTOL, MITR)
+        ),
+        rtol=RTOL, atol=XTOL
+    )
+
+
+# test brenth
+def test_brenth():
+    npt.assert_allclose(
+        EXPECTED,
+        list(
+            _zeros.loop_example('brenth', A0, ARGS, XLO, XHI, XTOL, RTOL, MITR)
+        ),
+        rtol=RTOL, atol=XTOL
+    )
+
+
+# test brentq
+def test_brentq():
+    npt.assert_allclose(
+        EXPECTED,
+        list(
+            _zeros.loop_example('brentq', A0, ARGS, XLO, XHI, XTOL, RTOL, MITR)
+        ),
+        rtol=RTOL, atol=XTOL
+    )
+
+
+# test brentq with full output
+def test_brentq_full_output():
+    output = _zeros.full_output_example(
+        (A0[0],) + ARGS, XLO, XHI, XTOL, RTOL, MITR)
+    npt.assert_allclose(EXPECTED[0], output['root'], rtol=RTOL, atol=XTOL)
+    npt.assert_equal(6, output['iterations'])
+    npt.assert_equal(7, output['funcalls'])
+    npt.assert_equal(0, output['error_num'])
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiable_functions.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiable_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a329135f91e801847ee1ee4073e1701acf16c82
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiable_functions.py
@@ -0,0 +1,803 @@
+import pytest
+import platform
+import numpy as np
+from numpy.testing import (TestCase, assert_array_almost_equal,
+                           assert_array_equal, assert_, assert_allclose,
+                           assert_equal)
+from scipy._lib._gcutils import assert_deallocated
+from scipy.sparse import csr_matrix
+from scipy.sparse.linalg import LinearOperator
+from scipy.optimize._differentiable_functions import (ScalarFunction,
+                                                      VectorFunction,
+                                                      LinearVectorFunction,
+                                                      IdentityVectorFunction)
+from scipy.optimize import rosen, rosen_der, rosen_hess
+from scipy.optimize._hessian_update_strategy import BFGS
+
+
+class ExScalarFunction:
+
+    def __init__(self):
+        self.nfev = 0
+        self.ngev = 0
+        self.nhev = 0
+
+    def fun(self, x):
+        self.nfev += 1
+        return 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+
+    def grad(self, x):
+        self.ngev += 1
+        return np.array([4*x[0]-1, 4*x[1]])
+
+    def hess(self, x):
+        self.nhev += 1
+        return 4*np.eye(2)
+
+
+class TestScalarFunction(TestCase):
+
+    def test_finite_difference_grad(self):
+        ex = ExScalarFunction()
+        nfev = 0
+        ngev = 0
+
+        x0 = [1.0, 0.0]
+        analit = ScalarFunction(ex.fun, x0, (), ex.grad,
+                                ex.hess, None, (-np.inf, np.inf))
+        nfev += 1
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev, nfev)
+        approx = ScalarFunction(ex.fun, x0, (), '2-point',
+                                ex.hess, None, (-np.inf, np.inf))
+        nfev += 3
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(analit.f, approx.f)
+        assert_array_almost_equal(analit.g, approx.g)
+
+        x = [10, 0.3]
+        f_analit = analit.fun(x)
+        g_analit = analit.grad(x)
+        nfev += 1
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        f_approx = approx.fun(x)
+        g_approx = approx.grad(x)
+        nfev += 3
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(g_analit, g_approx)
+
+        x = [2.0, 1.0]
+        g_analit = analit.grad(x)
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+
+        g_approx = approx.grad(x)
+        nfev += 3
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_almost_equal(g_analit, g_approx)
+
+        x = [2.5, 0.3]
+        f_analit = analit.fun(x)
+        g_analit = analit.grad(x)
+        nfev += 1
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        f_approx = approx.fun(x)
+        g_approx = approx.grad(x)
+        nfev += 3
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(g_analit, g_approx)
+
+        x = [2, 0.3]
+        f_analit = analit.fun(x)
+        g_analit = analit.grad(x)
+        nfev += 1
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        f_approx = approx.fun(x)
+        g_approx = approx.grad(x)
+        nfev += 3
+        ngev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(g_analit, g_approx)
+
+    def test_fun_and_grad(self):
+        ex = ExScalarFunction()
+
+        def fg_allclose(x, y):
+            assert_allclose(x[0], y[0])
+            assert_allclose(x[1], y[1])
+
+        # with analytic gradient
+        x0 = [2.0, 0.3]
+        analit = ScalarFunction(ex.fun, x0, (), ex.grad,
+                                ex.hess, None, (-np.inf, np.inf))
+
+        fg = ex.fun(x0), ex.grad(x0)
+        fg_allclose(analit.fun_and_grad(x0), fg)
+        assert analit.ngev == 1
+
+        x0[1] = 1.
+        fg = ex.fun(x0), ex.grad(x0)
+        fg_allclose(analit.fun_and_grad(x0), fg)
+
+        # with finite difference gradient
+        x0 = [2.0, 0.3]
+        sf = ScalarFunction(ex.fun, x0, (), '3-point',
+                                ex.hess, None, (-np.inf, np.inf))
+        assert sf.ngev == 1
+        fg = ex.fun(x0), ex.grad(x0)
+        fg_allclose(sf.fun_and_grad(x0), fg)
+        assert sf.ngev == 1
+
+        x0[1] = 1.
+        fg = ex.fun(x0), ex.grad(x0)
+        fg_allclose(sf.fun_and_grad(x0), fg)
+
+    def test_finite_difference_hess_linear_operator(self):
+        ex = ExScalarFunction()
+        nfev = 0
+        ngev = 0
+        nhev = 0
+
+        x0 = [1.0, 0.0]
+        analit = ScalarFunction(ex.fun, x0, (), ex.grad,
+                                ex.hess, None, (-np.inf, np.inf))
+        nfev += 1
+        ngev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev, nhev)
+        approx = ScalarFunction(ex.fun, x0, (), ex.grad,
+                                '2-point', None, (-np.inf, np.inf))
+        assert_(isinstance(approx.H, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_equal(analit.f, approx.f)
+            assert_array_almost_equal(analit.g, approx.g)
+            assert_array_almost_equal(analit.H.dot(v), approx.H.dot(v))
+        nfev += 1
+        ngev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.0, 1.0]
+        H_analit = analit.hess(x)
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        H_approx = approx.hess(x)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
+        ngev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.1, 1.2]
+        H_analit = analit.hess(x)
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        H_approx = approx.hess(x)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
+        ngev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.5, 0.3]
+        _ = analit.grad(x)
+        H_analit = analit.hess(x)
+        ngev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        _ = approx.grad(x)
+        H_approx = approx.hess(x)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
+        ngev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [5.2, 2.3]
+        _ = analit.grad(x)
+        H_analit = analit.hess(x)
+        ngev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        _ = approx.grad(x)
+        H_approx = approx.hess(x)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
+        ngev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.ngev, ngev)
+        assert_array_equal(analit.ngev+approx.ngev, ngev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+    def test_x_storage_overlap(self):
+        # Scalar_Function should not store references to arrays, it should
+        # store copies - this checks that updating an array in-place causes
+        # Scalar_Function.x to be updated.
+
+        def f(x):
+            return np.sum(np.asarray(x) ** 2)
+
+        x = np.array([1., 2., 3.])
+        sf = ScalarFunction(f, x, (), '3-point', lambda x: x, None, (-np.inf, np.inf))
+
+        assert x is not sf.x
+        assert_equal(sf.fun(x), 14.0)
+        assert x is not sf.x
+
+        x[0] = 0.
+        f1 = sf.fun(x)
+        assert_equal(f1, 13.0)
+
+        x[0] = 1
+        f2 = sf.fun(x)
+        assert_equal(f2, 14.0)
+        assert x is not sf.x
+
+        # now test with a HessianUpdate strategy specified
+        hess = BFGS()
+        x = np.array([1., 2., 3.])
+        sf = ScalarFunction(f, x, (), '3-point', hess, None, (-np.inf, np.inf))
+
+        assert x is not sf.x
+        assert_equal(sf.fun(x), 14.0)
+        assert x is not sf.x
+
+        x[0] = 0.
+        f1 = sf.fun(x)
+        assert_equal(f1, 13.0)
+
+        x[0] = 1
+        f2 = sf.fun(x)
+        assert_equal(f2, 14.0)
+        assert x is not sf.x
+
+        # gh13740 x is changed in user function
+        def ff(x):
+            x *= x    # overwrite x
+            return np.sum(x)
+
+        x = np.array([1., 2., 3.])
+        sf = ScalarFunction(
+            ff, x, (), '3-point', lambda x: x, None, (-np.inf, np.inf)
+        )
+        assert x is not sf.x
+        assert_equal(sf.fun(x), 14.0)
+        assert_equal(sf.x, np.array([1., 2., 3.]))
+        assert x is not sf.x
+
+    def test_lowest_x(self):
+        # ScalarFunction should remember the lowest func(x) visited.
+        x0 = np.array([2, 3, 4])
+        sf = ScalarFunction(rosen, x0, (), rosen_der, rosen_hess,
+                            None, None)
+        sf.fun([1, 1, 1])
+        sf.fun(x0)
+        sf.fun([1.01, 1, 1.0])
+        sf.grad([1.01, 1, 1.0])
+        assert_equal(sf._lowest_f, 0.0)
+        assert_equal(sf._lowest_x, [1.0, 1.0, 1.0])
+
+        sf = ScalarFunction(rosen, x0, (), '2-point', rosen_hess,
+                            None, (-np.inf, np.inf))
+        sf.fun([1, 1, 1])
+        sf.fun(x0)
+        sf.fun([1.01, 1, 1.0])
+        sf.grad([1.01, 1, 1.0])
+        assert_equal(sf._lowest_f, 0.0)
+        assert_equal(sf._lowest_x, [1.0, 1.0, 1.0])
+
+    def test_float_size(self):
+        x0 = np.array([2, 3, 4]).astype(np.float32)
+
+        # check that ScalarFunction/approx_derivative always send the correct
+        # float width
+        def rosen_(x):
+            assert x.dtype == np.float32
+            return rosen(x)
+
+        sf = ScalarFunction(rosen_, x0, (), '2-point', rosen_hess,
+                            None, (-np.inf, np.inf))
+        res = sf.fun(x0)
+        assert res.dtype == np.float32
+
+
+class ExVectorialFunction:
+
+    def __init__(self):
+        self.nfev = 0
+        self.njev = 0
+        self.nhev = 0
+
+    def fun(self, x):
+        self.nfev += 1
+        return np.array([2*(x[0]**2 + x[1]**2 - 1) - x[0],
+                         4*(x[0]**3 + x[1]**2 - 4) - 3*x[0]], dtype=x.dtype)
+
+    def jac(self, x):
+        self.njev += 1
+        return np.array([[4*x[0]-1, 4*x[1]],
+                         [12*x[0]**2-3, 8*x[1]]], dtype=x.dtype)
+
+    def hess(self, x, v):
+        self.nhev += 1
+        return v[0]*4*np.eye(2) + v[1]*np.array([[24*x[0], 0],
+                                                 [0, 8]])
+
+
+class TestVectorialFunction(TestCase):
+
+    def test_finite_difference_jac(self):
+        ex = ExVectorialFunction()
+        nfev = 0
+        njev = 0
+
+        x0 = [1.0, 0.0]
+        analit = VectorFunction(ex.fun, x0, ex.jac, ex.hess, None, None,
+                                (-np.inf, np.inf), None)
+        nfev += 1
+        njev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev, njev)
+        approx = VectorFunction(ex.fun, x0, '2-point', ex.hess, None, None,
+                                (-np.inf, np.inf), None)
+        nfev += 3
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(analit.f, approx.f)
+        assert_array_almost_equal(analit.J, approx.J)
+
+        x = [10, 0.3]
+        f_analit = analit.fun(x)
+        J_analit = analit.jac(x)
+        nfev += 1
+        njev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        f_approx = approx.fun(x)
+        J_approx = approx.jac(x)
+        nfev += 3
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(J_analit, J_approx, decimal=4)
+
+        x = [2.0, 1.0]
+        J_analit = analit.jac(x)
+        njev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        J_approx = approx.jac(x)
+        nfev += 3
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_almost_equal(J_analit, J_approx)
+
+        x = [2.5, 0.3]
+        f_analit = analit.fun(x)
+        J_analit = analit.jac(x)
+        nfev += 1
+        njev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        f_approx = approx.fun(x)
+        J_approx = approx.jac(x)
+        nfev += 3
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(J_analit, J_approx)
+
+        x = [2, 0.3]
+        f_analit = analit.fun(x)
+        J_analit = analit.jac(x)
+        nfev += 1
+        njev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        f_approx = approx.fun(x)
+        J_approx = approx.jac(x)
+        nfev += 3
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_almost_equal(f_analit, f_approx)
+        assert_array_almost_equal(J_analit, J_approx)
+
+    def test_finite_difference_hess_linear_operator(self):
+        ex = ExVectorialFunction()
+        nfev = 0
+        njev = 0
+        nhev = 0
+
+        x0 = [1.0, 0.0]
+        v0 = [1.0, 2.0]
+        analit = VectorFunction(ex.fun, x0, ex.jac, ex.hess, None, None,
+                                (-np.inf, np.inf), None)
+        nfev += 1
+        njev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev, nhev)
+        approx = VectorFunction(ex.fun, x0, ex.jac, '2-point', None, None,
+                                (-np.inf, np.inf), None)
+        assert_(isinstance(approx.H, LinearOperator))
+        for p in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_equal(analit.f, approx.f)
+            assert_array_almost_equal(analit.J, approx.J)
+            assert_array_almost_equal(analit.H.dot(p), approx.H.dot(p))
+        nfev += 1
+        njev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.0, 1.0]
+        H_analit = analit.hess(x, v0)
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        H_approx = approx.hess(x, v0)
+        assert_(isinstance(H_approx, LinearOperator))
+        for p in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(p), H_approx.dot(p),
+                                      decimal=5)
+        njev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.1, 1.2]
+        v = [1.0, 1.0]
+        H_analit = analit.hess(x, v)
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        H_approx = approx.hess(x, v)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
+        njev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [2.5, 0.3]
+        _ = analit.jac(x)
+        H_analit = analit.hess(x, v0)
+        njev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        _ = approx.jac(x)
+        H_approx = approx.hess(x, v0)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v), decimal=4)
+        njev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+        x = [5.2, 2.3]
+        v = [2.3, 5.2]
+        _ = analit.jac(x)
+        H_analit = analit.hess(x, v)
+        njev += 1
+        nhev += 1
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+        _ = approx.jac(x)
+        H_approx = approx.hess(x, v)
+        assert_(isinstance(H_approx, LinearOperator))
+        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
+            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v), decimal=4)
+        njev += 4
+        assert_array_equal(ex.nfev, nfev)
+        assert_array_equal(analit.nfev+approx.nfev, nfev)
+        assert_array_equal(ex.njev, njev)
+        assert_array_equal(analit.njev+approx.njev, njev)
+        assert_array_equal(ex.nhev, nhev)
+        assert_array_equal(analit.nhev+approx.nhev, nhev)
+
+    def test_x_storage_overlap(self):
+        # VectorFunction should not store references to arrays, it should
+        # store copies - this checks that updating an array in-place causes
+        # Scalar_Function.x to be updated.
+        ex = ExVectorialFunction()
+        x0 = np.array([1.0, 0.0])
+
+        vf = VectorFunction(ex.fun, x0, '3-point', ex.hess, None, None,
+                            (-np.inf, np.inf), None)
+
+        assert x0 is not vf.x
+        assert_equal(vf.fun(x0), ex.fun(x0))
+        assert x0 is not vf.x
+
+        x0[0] = 2.
+        assert_equal(vf.fun(x0), ex.fun(x0))
+        assert x0 is not vf.x
+
+        x0[0] = 1.
+        assert_equal(vf.fun(x0), ex.fun(x0))
+        assert x0 is not vf.x
+
+        # now test with a HessianUpdate strategy specified
+        hess = BFGS()
+        x0 = np.array([1.0, 0.0])
+        vf = VectorFunction(ex.fun, x0, '3-point', hess, None, None,
+                            (-np.inf, np.inf), None)
+
+        with pytest.warns(UserWarning):
+            # filter UserWarning because ExVectorialFunction is linear and
+            # a quasi-Newton approximation is used for the Hessian.
+            assert x0 is not vf.x
+            assert_equal(vf.fun(x0), ex.fun(x0))
+            assert x0 is not vf.x
+
+            x0[0] = 2.
+            assert_equal(vf.fun(x0), ex.fun(x0))
+            assert x0 is not vf.x
+
+            x0[0] = 1.
+            assert_equal(vf.fun(x0), ex.fun(x0))
+            assert x0 is not vf.x
+
+    def test_float_size(self):
+        ex = ExVectorialFunction()
+        x0 = np.array([1.0, 0.0]).astype(np.float32)
+
+        vf = VectorFunction(ex.fun, x0, ex.jac, ex.hess, None, None,
+                            (-np.inf, np.inf), None)
+
+        res = vf.fun(x0)
+        assert res.dtype == np.float32
+
+        res = vf.jac(x0)
+        assert res.dtype == np.float32
+
+
+def test_LinearVectorFunction():
+    A_dense = np.array([
+        [-1, 2, 0],
+        [0, 4, 2]
+    ])
+    x0 = np.zeros(3)
+    A_sparse = csr_matrix(A_dense)
+    x = np.array([1, -1, 0])
+    v = np.array([-1, 1])
+    Ax = np.array([-3, -4])
+
+    f1 = LinearVectorFunction(A_dense, x0, None)
+    assert_(not f1.sparse_jacobian)
+
+    f2 = LinearVectorFunction(A_dense, x0, True)
+    assert_(f2.sparse_jacobian)
+
+    f3 = LinearVectorFunction(A_dense, x0, False)
+    assert_(not f3.sparse_jacobian)
+
+    f4 = LinearVectorFunction(A_sparse, x0, None)
+    assert_(f4.sparse_jacobian)
+
+    f5 = LinearVectorFunction(A_sparse, x0, True)
+    assert_(f5.sparse_jacobian)
+
+    f6 = LinearVectorFunction(A_sparse, x0, False)
+    assert_(not f6.sparse_jacobian)
+
+    assert_array_equal(f1.fun(x), Ax)
+    assert_array_equal(f2.fun(x), Ax)
+    assert_array_equal(f1.jac(x), A_dense)
+    assert_array_equal(f2.jac(x).toarray(), A_sparse.toarray())
+    assert_array_equal(f1.hess(x, v).toarray(), np.zeros((3, 3)))
+
+
+def test_LinearVectorFunction_memoization():
+    A = np.array([[-1, 2, 0], [0, 4, 2]])
+    x0 = np.array([1, 2, -1])
+    fun = LinearVectorFunction(A, x0, False)
+
+    assert_array_equal(x0, fun.x)
+    assert_array_equal(A.dot(x0), fun.f)
+
+    x1 = np.array([-1, 3, 10])
+    assert_array_equal(A, fun.jac(x1))
+    assert_array_equal(x1, fun.x)
+    assert_array_equal(A.dot(x0), fun.f)
+    assert_array_equal(A.dot(x1), fun.fun(x1))
+    assert_array_equal(A.dot(x1), fun.f)
+
+
+def test_IdentityVectorFunction():
+    x0 = np.zeros(3)
+
+    f1 = IdentityVectorFunction(x0, None)
+    f2 = IdentityVectorFunction(x0, False)
+    f3 = IdentityVectorFunction(x0, True)
+
+    assert_(f1.sparse_jacobian)
+    assert_(not f2.sparse_jacobian)
+    assert_(f3.sparse_jacobian)
+
+    x = np.array([-1, 2, 1])
+    v = np.array([-2, 3, 0])
+
+    assert_array_equal(f1.fun(x), x)
+    assert_array_equal(f2.fun(x), x)
+
+    assert_array_equal(f1.jac(x).toarray(), np.eye(3))
+    assert_array_equal(f2.jac(x), np.eye(3))
+
+    assert_array_equal(f1.hess(x, v).toarray(), np.zeros((3, 3)))
+
+
+@pytest.mark.skipif(
+    platform.python_implementation() == "PyPy",
+    reason="assert_deallocate not available on PyPy"
+)
+def test_ScalarFunctionNoReferenceCycle():
+    """Regression test for gh-20768."""
+    ex = ExScalarFunction()
+    x0 = np.zeros(3)
+    with assert_deallocated(lambda: ScalarFunction(ex.fun, x0, (), ex.grad,
+                            ex.hess, None, (-np.inf, np.inf))):
+        pass
+
+
+@pytest.mark.skipif(
+    platform.python_implementation() == "PyPy",
+    reason="assert_deallocate not available on PyPy"
+)
+@pytest.mark.xfail(reason="TODO remove reference cycle from VectorFunction")
+def test_VectorFunctionNoReferenceCycle():
+    """Regression test for gh-20768."""
+    ex = ExVectorialFunction()
+    x0 = [1.0, 0.0]
+    with assert_deallocated(lambda: VectorFunction(ex.fun, x0, ex.jac,
+                            ex.hess, None, None, (-np.inf, np.inf), None)):
+        pass
+
+
+@pytest.mark.skipif(
+    platform.python_implementation() == "PyPy",
+    reason="assert_deallocate not available on PyPy"
+)
+def test_LinearVectorFunctionNoReferenceCycle():
+    """Regression test for gh-20768."""
+    A_dense = np.array([
+        [-1, 2, 0],
+        [0, 4, 2]
+    ])
+    x0 = np.zeros(3)
+    A_sparse = csr_matrix(A_dense)
+    with assert_deallocated(lambda: LinearVectorFunction(A_sparse, x0, None)):
+        pass
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiate.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiate.py
new file mode 100644
index 0000000000000000000000000000000000000000..195fec2f180a07c92e639a4e14d7a8b781cfa1ef
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_differentiate.py
@@ -0,0 +1,512 @@
+import pytest
+
+import numpy as np
+from numpy.testing import assert_array_less, assert_allclose, assert_equal
+
+import scipy._lib._elementwise_iterative_method as eim
+from scipy import stats, optimize
+from scipy.optimize._differentiate import (_differentiate as differentiate,
+                                           _jacobian as jacobian, _EERRORINCREASE)
+
+class TestDifferentiate:
+
+    def f(self, x):
+        return stats.norm().cdf(x)
+
+    @pytest.mark.parametrize('x', [0.6, np.linspace(-0.05, 1.05, 10)])
+    def test_basic(self, x):
+        # Invert distribution CDF and compare against distribution `ppf`
+        res = differentiate(self.f, x)
+        ref = stats.norm().pdf(x)
+        np.testing.assert_allclose(res.df, ref)
+        # This would be nice, but doesn't always work out. `error` is an
+        # estimate, not a bound.
+        assert_array_less(abs(res.df - ref), res.error)
+        assert res.x.shape == ref.shape
+
+    @pytest.mark.parametrize('case', stats._distr_params.distcont)
+    def test_accuracy(self, case):
+        distname, params = case
+        dist = getattr(stats, distname)(*params)
+        x = dist.median() + 0.1
+        res = differentiate(dist.cdf, x)
+        ref = dist.pdf(x)
+        assert_allclose(res.df, ref, atol=1e-10)
+
+    @pytest.mark.parametrize('order', [1, 6])
+    @pytest.mark.parametrize('shape', [tuple(), (12,), (3, 4), (3, 2, 2)])
+    def test_vectorization(self, order, shape):
+        # Test for correct functionality, output shapes, and dtypes for various
+        # input shapes.
+        x = np.linspace(-0.05, 1.05, 12).reshape(shape) if shape else 0.6
+        n = np.size(x)
+
+        @np.vectorize
+        def _differentiate_single(x):
+            return differentiate(self.f, x, order=order)
+
+        def f(x, *args, **kwargs):
+            f.nit += 1
+            f.feval += 1 if (x.size == n or x.ndim <=1) else x.shape[-1]
+            return self.f(x, *args, **kwargs)
+        f.nit = -1
+        f.feval = 0
+
+        res = differentiate(f, x, order=order)
+        refs = _differentiate_single(x).ravel()
+
+        ref_x = [ref.x for ref in refs]
+        assert_allclose(res.x.ravel(), ref_x)
+        assert_equal(res.x.shape, shape)
+
+        ref_df = [ref.df for ref in refs]
+        assert_allclose(res.df.ravel(), ref_df)
+        assert_equal(res.df.shape, shape)
+
+        ref_error = [ref.error for ref in refs]
+        assert_allclose(res.error.ravel(), ref_error, atol=1e-12)
+        assert_equal(res.error.shape, shape)
+
+        ref_success = [ref.success for ref in refs]
+        assert_equal(res.success.ravel(), ref_success)
+        assert_equal(res.success.shape, shape)
+        assert np.issubdtype(res.success.dtype, np.bool_)
+
+        ref_flag = [ref.status for ref in refs]
+        assert_equal(res.status.ravel(), ref_flag)
+        assert_equal(res.status.shape, shape)
+        assert np.issubdtype(res.status.dtype, np.integer)
+
+        ref_nfev = [ref.nfev for ref in refs]
+        assert_equal(res.nfev.ravel(), ref_nfev)
+        assert_equal(np.max(res.nfev), f.feval)
+        assert_equal(res.nfev.shape, res.x.shape)
+        assert np.issubdtype(res.nfev.dtype, np.integer)
+
+        ref_nit = [ref.nit for ref in refs]
+        assert_equal(res.nit.ravel(), ref_nit)
+        assert_equal(np.max(res.nit), f.nit)
+        assert_equal(res.nit.shape, res.x.shape)
+        assert np.issubdtype(res.nit.dtype, np.integer)
+
+    def test_flags(self):
+        # Test cases that should produce different status flags; show that all
+        # can be produced simultaneously.
+        rng = np.random.default_rng(5651219684984213)
+        def f(xs, js):
+            f.nit += 1
+            funcs = [lambda x: x - 2.5,  # converges
+                     lambda x: np.exp(x)*rng.random(),  # error increases
+                     lambda x: np.exp(x),  # reaches maxiter due to order=2
+                     lambda x: np.full_like(x, np.nan)[()]]  # stops due to NaN
+            res = [funcs[j](x) for x, j in zip(xs, js.ravel())]
+            return res
+        f.nit = 0
+
+        args = (np.arange(4, dtype=np.int64),)
+        res = differentiate(f, [1]*4, rtol=1e-14, order=2, args=args)
+
+        ref_flags = np.array([eim._ECONVERGED,
+                              _EERRORINCREASE,
+                              eim._ECONVERR,
+                              eim._EVALUEERR])
+        assert_equal(res.status, ref_flags)
+
+    def test_flags_preserve_shape(self):
+        # Same test as above but using `preserve_shape` option to simplify.
+        rng = np.random.default_rng(5651219684984213)
+        def f(x):
+            return [x - 2.5,  # converges
+                    np.exp(x)*rng.random(),  # error increases
+                    np.exp(x),  # reaches maxiter due to order=2
+                    np.full_like(x, np.nan)[()]]  # stops due to NaN
+
+        res = differentiate(f, 1, rtol=1e-14, order=2, preserve_shape=True)
+
+        ref_flags = np.array([eim._ECONVERGED,
+                              _EERRORINCREASE,
+                              eim._ECONVERR,
+                              eim._EVALUEERR])
+        assert_equal(res.status, ref_flags)
+
+    def test_preserve_shape(self):
+        # Test `preserve_shape` option
+        def f(x):
+            return [x, np.sin(3*x), x+np.sin(10*x), np.sin(20*x)*(x-1)**2]
+
+        x = 0
+        ref = [1, 3*np.cos(3*x), 1+10*np.cos(10*x),
+               20*np.cos(20*x)*(x-1)**2 + 2*np.sin(20*x)*(x-1)]
+        res = differentiate(f, x, preserve_shape=True)
+        assert_allclose(res.df, ref)
+
+    def test_convergence(self):
+        # Test that the convergence tolerances behave as expected
+        dist = stats.norm()
+        x = 1
+        f = dist.cdf
+        ref = dist.pdf(x)
+        kwargs0 = dict(atol=0, rtol=0, order=4)
+
+        kwargs = kwargs0.copy()
+        kwargs['atol'] = 1e-3
+        res1 = differentiate(f, x, **kwargs)
+        assert_array_less(abs(res1.df - ref), 1e-3)
+        kwargs['atol'] = 1e-6
+        res2 = differentiate(f, x, **kwargs)
+        assert_array_less(abs(res2.df - ref), 1e-6)
+        assert_array_less(abs(res2.df - ref), abs(res1.df - ref))
+
+        kwargs = kwargs0.copy()
+        kwargs['rtol'] = 1e-3
+        res1 = differentiate(f, x, **kwargs)
+        assert_array_less(abs(res1.df - ref), 1e-3 * np.abs(ref))
+        kwargs['rtol'] = 1e-6
+        res2 = differentiate(f, x, **kwargs)
+        assert_array_less(abs(res2.df - ref), 1e-6 * np.abs(ref))
+        assert_array_less(abs(res2.df - ref), abs(res1.df - ref))
+
+    def test_step_parameters(self):
+        # Test that step factors have the expected effect on accuracy
+        dist = stats.norm()
+        x = 1
+        f = dist.cdf
+        ref = dist.pdf(x)
+
+        res1 = differentiate(f, x, initial_step=0.5, maxiter=1)
+        res2 = differentiate(f, x, initial_step=0.05, maxiter=1)
+        assert abs(res2.df - ref) < abs(res1.df - ref)
+
+        res1 = differentiate(f, x, step_factor=2, maxiter=1)
+        res2 = differentiate(f, x, step_factor=20, maxiter=1)
+        assert abs(res2.df - ref) < abs(res1.df - ref)
+
+        # `step_factor` can be less than 1: `initial_step` is the minimum step
+        kwargs = dict(order=4, maxiter=1, step_direction=0)
+        res = differentiate(f, x, initial_step=0.5, step_factor=0.5, **kwargs)
+        ref = differentiate(f, x, initial_step=1, step_factor=2, **kwargs)
+        assert_allclose(res.df, ref.df, rtol=5e-15)
+
+        # This is a similar test for one-sided difference
+        kwargs = dict(order=2, maxiter=1, step_direction=1)
+        res = differentiate(f, x, initial_step=1, step_factor=2, **kwargs)
+        ref = differentiate(f, x, initial_step=1/np.sqrt(2), step_factor=0.5,
+                                   **kwargs)
+        assert_allclose(res.df, ref.df, rtol=5e-15)
+
+        kwargs['step_direction'] = -1
+        res = differentiate(f, x, initial_step=1, step_factor=2, **kwargs)
+        ref = differentiate(f, x, initial_step=1/np.sqrt(2), step_factor=0.5,
+                                   **kwargs)
+        assert_allclose(res.df, ref.df, rtol=5e-15)
+
+    def test_step_direction(self):
+        # test that `step_direction` works as expected
+        def f(x):
+            y = np.exp(x)
+            y[(x < 0) + (x > 2)] = np.nan
+            return y
+
+        x = np.linspace(0, 2, 10)
+        step_direction = np.zeros_like(x)
+        step_direction[x < 0.6], step_direction[x > 1.4] = 1, -1
+        res = differentiate(f, x, step_direction=step_direction)
+        assert_allclose(res.df, np.exp(x))
+        assert np.all(res.success)
+
+    def test_vectorized_step_direction_args(self):
+        # test that `step_direction` and `args` are vectorized properly
+        def f(x, p):
+            return x ** p
+
+        def df(x, p):
+            return p * x ** (p - 1)
+
+        x = np.array([1, 2, 3, 4]).reshape(-1, 1, 1)
+        hdir = np.array([-1, 0, 1]).reshape(1, -1, 1)
+        p = np.array([2, 3]).reshape(1, 1, -1)
+        res = differentiate(f, x, step_direction=hdir, args=(p,))
+        ref = np.broadcast_to(df(x, p), res.df.shape)
+        assert_allclose(res.df, ref)
+
+    def test_maxiter_callback(self):
+        # Test behavior of `maxiter` parameter and `callback` interface
+        x = 0.612814
+        dist = stats.norm()
+        maxiter = 3
+
+        def f(x):
+            res = dist.cdf(x)
+            return res
+
+        default_order = 8
+        res = differentiate(f, x, maxiter=maxiter, rtol=1e-15)
+        assert not np.any(res.success)
+        assert np.all(res.nfev == default_order + 1 + (maxiter - 1)*2)
+        assert np.all(res.nit == maxiter)
+
+        def callback(res):
+            callback.iter += 1
+            callback.res = res
+            assert hasattr(res, 'x')
+            assert res.df not in callback.dfs
+            callback.dfs.add(res.df)
+            assert res.status == eim._EINPROGRESS
+            if callback.iter == maxiter:
+                raise StopIteration
+        callback.iter = -1  # callback called once before first iteration
+        callback.res = None
+        callback.dfs = set()
+
+        res2 = differentiate(f, x, callback=callback, rtol=1e-15)
+        # terminating with callback is identical to terminating due to maxiter
+        # (except for `status`)
+        for key in res.keys():
+            if key == 'status':
+                assert res[key] == eim._ECONVERR
+                assert callback.res[key] == eim._EINPROGRESS
+                assert res2[key] == eim._ECALLBACK
+            else:
+                assert res2[key] == callback.res[key] == res[key]
+
+    @pytest.mark.parametrize("hdir", (-1, 0, 1))
+    @pytest.mark.parametrize("x", (0.65, [0.65, 0.7]))
+    @pytest.mark.parametrize("dtype", (np.float16, np.float32, np.float64))
+    def test_dtype(self, hdir, x, dtype):
+        # Test that dtypes are preserved
+        x = np.asarray(x, dtype=dtype)[()]
+
+        def f(x):
+            assert x.dtype == dtype
+            return np.exp(x)
+
+        def callback(res):
+            assert res.x.dtype == dtype
+            assert res.df.dtype == dtype
+            assert res.error.dtype == dtype
+
+        res = differentiate(f, x, order=4, step_direction=hdir,
+                                   callback=callback)
+        assert res.x.dtype == dtype
+        assert res.df.dtype == dtype
+        assert res.error.dtype == dtype
+        eps = np.finfo(dtype).eps
+        assert_allclose(res.df, np.exp(res.x), rtol=np.sqrt(eps))
+
+    def test_input_validation(self):
+        # Test input validation for appropriate error messages
+
+        message = '`func` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            differentiate(None, 1)
+
+        message = 'Abscissae and function output must be real numbers.'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, -4+1j)
+
+        message = "When `preserve_shape=False`, the shape of the array..."
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: [1, 2, 3], [-2, -3])
+
+        message = 'Tolerances and step parameters must be non-negative...'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, atol=-1)
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, rtol='ekki')
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, initial_step=None)
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, step_factor=object())
+
+        message = '`maxiter` must be a positive integer.'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, maxiter=1.5)
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, maxiter=0)
+
+        message = '`order` must be a positive integer'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, order=1.5)
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, order=0)
+
+        message = '`preserve_shape` must be True or False.'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, preserve_shape='herring')
+
+        message = '`callback` must be callable.'
+        with pytest.raises(ValueError, match=message):
+            differentiate(lambda x: x, 1, callback='shrubbery')
+
+    def test_special_cases(self):
+        # Test edge cases and other special cases
+
+        # Test that integers are not passed to `f`
+        # (otherwise this would overflow)
+        def f(x):
+            assert np.issubdtype(x.dtype, np.floating)
+            return x ** 99 - 1
+
+        res = differentiate(f, 7, rtol=1e-10)
+        assert res.success
+        assert_allclose(res.df, 99*7.**98)
+
+        # Test that if success is achieved in the correct number
+        # of iterations if function is a polynomial. Ideally, all polynomials
+        # of order 0-2 would get exact result with 0 refinement iterations,
+        # all polynomials of order 3-4 would be differentiated exactly after
+        # 1 iteration, etc. However, it seems that _differentiate needs an
+        # extra iteration to detect convergence based on the error estimate.
+
+        for n in range(6):
+            x = 1.5
+            def f(x):
+                return 2*x**n
+
+            ref = 2*n*x**(n-1)
+
+            res = differentiate(f, x, maxiter=1, order=max(1, n))
+            assert_allclose(res.df, ref, rtol=1e-15)
+            assert_equal(res.error, np.nan)
+
+            res = differentiate(f, x, order=max(1, n))
+            assert res.success
+            assert res.nit == 2
+            assert_allclose(res.df, ref, rtol=1e-15)
+
+        # Test scalar `args` (not in tuple)
+        def f(x, c):
+            return c*x - 1
+
+        res = differentiate(f, 2, args=3)
+        assert_allclose(res.df, 3)
+
+    @pytest.mark.xfail
+    @pytest.mark.parametrize("case", (  # function, evaluation point
+        (lambda x: (x - 1) ** 3, 1),
+        (lambda x: np.where(x > 1, (x - 1) ** 5, (x - 1) ** 3), 1)
+    ))
+    def test_saddle_gh18811(self, case):
+        # With default settings, _differentiate will not always converge when
+        # the true derivative is exactly zero. This tests that specifying a
+        # (tight) `atol` alleviates the problem. See discussion in gh-18811.
+        atol = 1e-16
+        res = differentiate(*case, step_direction=[-1, 0, 1], atol=atol)
+        assert np.all(res.success)
+        assert_allclose(res.df, 0, atol=atol)
+
+
+class TestJacobian:
+
+    # Example functions and Jacobians from Wikipedia:
+    # https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant#Examples
+
+    def f1(z):
+        x, y = z
+        return [x ** 2 * y, 5 * x + np.sin(y)]
+
+    def df1(z):
+        x, y = z
+        return [[2 * x * y, x ** 2], [np.full_like(x, 5), np.cos(y)]]
+
+    f1.mn = 2, 2  # type: ignore[attr-defined]
+    f1.ref = df1  # type: ignore[attr-defined]
+
+    def f2(z):
+        r, phi = z
+        return [r * np.cos(phi), r * np.sin(phi)]
+
+    def df2(z):
+        r, phi = z
+        return [[np.cos(phi), -r * np.sin(phi)],
+                [np.sin(phi), r * np.cos(phi)]]
+
+    f2.mn = 2, 2  # type: ignore[attr-defined]
+    f2.ref = df2  # type: ignore[attr-defined]
+
+    def f3(z):
+        r, phi, th = z
+        return [r * np.sin(phi) * np.cos(th), r * np.sin(phi) * np.sin(th),
+                r * np.cos(phi)]
+
+    def df3(z):
+        r, phi, th = z
+        return [[np.sin(phi) * np.cos(th), r * np.cos(phi) * np.cos(th),
+                 -r * np.sin(phi) * np.sin(th)],
+                [np.sin(phi) * np.sin(th), r * np.cos(phi) * np.sin(th),
+                 r * np.sin(phi) * np.cos(th)],
+                [np.cos(phi), -r * np.sin(phi), np.zeros_like(r)]]
+
+    f3.mn = 3, 3  # type: ignore[attr-defined]
+    f3.ref = df3  # type: ignore[attr-defined]
+
+    def f4(x):
+        x1, x2, x3 = x
+        return [x1, 5 * x3, 4 * x2 ** 2 - 2 * x3, x3 * np.sin(x1)]
+
+    def df4(x):
+        x1, x2, x3 = x
+        one = np.ones_like(x1)
+        return [[one, 0 * one, 0 * one],
+                [0 * one, 0 * one, 5 * one],
+                [0 * one, 8 * x2, -2 * one],
+                [x3 * np.cos(x1), 0 * one, np.sin(x1)]]
+
+    f4.mn = 3, 4  # type: ignore[attr-defined]
+    f4.ref = df4  # type: ignore[attr-defined]
+
+    def f5(x):
+        x1, x2, x3 = x
+        return [5 * x2, 4 * x1 ** 2 - 2 * np.sin(x2 * x3), x2 * x3]
+
+    def df5(x):
+        x1, x2, x3 = x
+        one = np.ones_like(x1)
+        return [[0 * one, 5 * one, 0 * one],
+                [8 * x1, -2 * x3 * np.cos(x2 * x3), -2 * x2 * np.cos(x2 * x3)],
+                [0 * one, x3, x2]]
+
+    f5.mn = 3, 3  # type: ignore[attr-defined]
+    f5.ref = df5  # type: ignore[attr-defined]
+
+    rosen = optimize.rosen
+    rosen.mn = 5, 1  # type: ignore[attr-defined]
+    rosen.ref = optimize.rosen_der  # type: ignore[attr-defined]
+
+    @pytest.mark.parametrize('size', [(), (6,), (2, 3)])
+    @pytest.mark.parametrize('func', [f1, f2, f3, f4, f5, rosen])
+    def test_examples(self, size, func):
+        rng = np.random.default_rng(458912319542)
+        m, n = func.mn
+        x = rng.random(size=(m,) + size)
+        res = jacobian(func, x).df
+        ref = func.ref(x)
+        np.testing.assert_allclose(res, ref, atol=1e-10)
+
+    def test_iv(self):
+        # Test input validation
+        message = "Argument `x` must be at least 1-D."
+        with pytest.raises(ValueError, match=message):
+            jacobian(np.sin, 1, atol=-1)
+
+        # Confirm that other parameters are being passed to `_derivative`,
+        # which raises an appropriate error message.
+        x = np.ones(3)
+        func = optimize.rosen
+        message = 'Tolerances and step parameters must be non-negative scalars.'
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, atol=-1)
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, rtol=-1)
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, initial_step=-1)
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, step_factor=-1)
+
+        message = '`order` must be a positive integer.'
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, order=-1)
+
+        message = '`maxiter` must be a positive integer.'
+        with pytest.raises(ValueError, match=message):
+            jacobian(func, x, maxiter=-1)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_direct.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_direct.py
new file mode 100644
index 0000000000000000000000000000000000000000..f131527deac44edc095be9d4d96d57fa49dadd1b
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_direct.py
@@ -0,0 +1,318 @@
+"""
+Unit test for DIRECT optimization algorithm.
+"""
+from numpy.testing import (assert_allclose,
+                           assert_array_less)
+import pytest
+import numpy as np
+from scipy.optimize import direct, Bounds
+
+
+class TestDIRECT:
+
+    def setup_method(self):
+        self.fun_calls = 0
+        self.bounds_sphere = 4*[(-2, 3)]
+        self.optimum_sphere_pos = np.zeros((4, ))
+        self.optimum_sphere = 0.0
+        self.bounds_stylinski_tang = Bounds([-4., -4.], [4., 4.])
+        self.maxiter = 1000
+
+    # test functions
+    def sphere(self, x):
+        self.fun_calls += 1
+        return np.square(x).sum()
+
+    def inv(self, x):
+        if np.sum(x) == 0:
+            raise ZeroDivisionError()
+        return 1/np.sum(x)
+
+    def nan_fun(self, x):
+        return np.nan
+
+    def inf_fun(self, x):
+        return np.inf
+
+    def styblinski_tang(self, pos):
+        x, y = pos
+        return 0.5 * (x**4 - 16 * x**2 + 5 * x + y**4 - 16 * y**2 + 5 * y)
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_direct(self, locally_biased):
+        res = direct(self.sphere, self.bounds_sphere,
+                     locally_biased=locally_biased)
+
+        # test accuracy
+        assert_allclose(res.x, self.optimum_sphere_pos,
+                        rtol=1e-3, atol=1e-3)
+        assert_allclose(res.fun, self.optimum_sphere, atol=1e-5, rtol=1e-5)
+
+        # test that result lies within bounds
+        _bounds = np.asarray(self.bounds_sphere)
+        assert_array_less(_bounds[:, 0], res.x)
+        assert_array_less(res.x, _bounds[:, 1])
+
+        # test number of function evaluations. Original DIRECT overshoots by
+        # up to 500 evaluations in last iteration
+        assert res.nfev <= 1000 * (len(self.bounds_sphere) + 1)
+        # test that number of function evaluations is correct
+        assert res.nfev == self.fun_calls
+
+        # test that number of iterations is below supplied maximum
+        assert res.nit <= self.maxiter
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_direct_callback(self, locally_biased):
+        # test that callback does not change the result
+        res = direct(self.sphere, self.bounds_sphere,
+                     locally_biased=locally_biased)
+
+        def callback(x):
+            x = 2*x
+            dummy = np.square(x)
+            print("DIRECT minimization algorithm callback test")
+            return dummy
+
+        res_callback = direct(self.sphere, self.bounds_sphere,
+                              locally_biased=locally_biased,
+                              callback=callback)
+
+        assert_allclose(res.x, res_callback.x)
+
+        assert res.nit == res_callback.nit
+        assert res.nfev == res_callback.nfev
+        assert res.status == res_callback.status
+        assert res.success == res_callback.success
+        assert res.fun == res_callback.fun
+        assert_allclose(res.x, res_callback.x)
+        assert res.message == res_callback.message
+
+        # test accuracy
+        assert_allclose(res_callback.x, self.optimum_sphere_pos,
+                        rtol=1e-3, atol=1e-3)
+        assert_allclose(res_callback.fun, self.optimum_sphere,
+                        atol=1e-5, rtol=1e-5)
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_exception(self, locally_biased):
+        bounds = 4*[(-10, 10)]
+        with pytest.raises(ZeroDivisionError):
+            direct(self.inv, bounds=bounds,
+                   locally_biased=locally_biased)
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_nan(self, locally_biased):
+        bounds = 4*[(-10, 10)]
+        direct(self.nan_fun, bounds=bounds,
+               locally_biased=locally_biased)
+
+    @pytest.mark.parametrize("len_tol", [1e-3, 1e-4])
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_len_tol(self, len_tol, locally_biased):
+        bounds = 4*[(-10., 10.)]
+        res = direct(self.sphere, bounds=bounds, len_tol=len_tol,
+                     vol_tol=1e-30, locally_biased=locally_biased)
+        assert res.status == 5
+        assert res.success
+        assert_allclose(res.x, np.zeros((4, )))
+        message = ("The side length measure of the hyperrectangle containing "
+                   "the lowest function value found is below "
+                   f"len_tol={len_tol}")
+        assert res.message == message
+
+    @pytest.mark.parametrize("vol_tol", [1e-6, 1e-8])
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_vol_tol(self, vol_tol, locally_biased):
+        bounds = 4*[(-10., 10.)]
+        res = direct(self.sphere, bounds=bounds, vol_tol=vol_tol,
+                     len_tol=0., locally_biased=locally_biased)
+        assert res.status == 4
+        assert res.success
+        assert_allclose(res.x, np.zeros((4, )))
+        message = ("The volume of the hyperrectangle containing the lowest "
+                   f"function value found is below vol_tol={vol_tol}")
+        assert res.message == message
+
+    @pytest.mark.parametrize("f_min_rtol", [1e-3, 1e-5, 1e-7])
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_f_min(self, f_min_rtol, locally_biased):
+        # test that desired function value is reached within
+        # relative tolerance of f_min_rtol
+        f_min = 1.
+        bounds = 4*[(-2., 10.)]
+        res = direct(self.sphere, bounds=bounds, f_min=f_min,
+                     f_min_rtol=f_min_rtol,
+                     locally_biased=locally_biased)
+        assert res.status == 3
+        assert res.success
+        assert res.fun < f_min * (1. + f_min_rtol)
+        message = ("The best function value found is within a relative "
+                   f"error={f_min_rtol} of the (known) global optimum f_min")
+        assert res.message == message
+
+    def circle_with_args(self, x, a, b):
+        return np.square(x[0] - a) + np.square(x[1] - b).sum()
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_f_circle_with_args(self, locally_biased):
+        bounds = 2*[(-2.0, 2.0)]
+
+        res = direct(self.circle_with_args, bounds, args=(1, 1), maxfun=1250,
+                     locally_biased=locally_biased)
+        assert_allclose(res.x, np.array([1., 1.]), rtol=1e-5)
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_failure_maxfun(self, locally_biased):
+        # test that if optimization runs for the maximal number of
+        # evaluations, success = False is returned
+
+        maxfun = 100
+        result = direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                        maxfun=maxfun, locally_biased=locally_biased)
+        assert result.success is False
+        assert result.status == 1
+        assert result.nfev >= maxfun
+        message = ("Number of function evaluations done is "
+                   f"larger than maxfun={maxfun}")
+        assert result.message == message
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_failure_maxiter(self, locally_biased):
+        # test that if optimization runs for the maximal number of
+        # iterations, success = False is returned
+
+        maxiter = 10
+        result = direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                        maxiter=maxiter, locally_biased=locally_biased)
+        assert result.success is False
+        assert result.status == 2
+        assert result.nit >= maxiter
+        message = f"Number of iterations is larger than maxiter={maxiter}"
+        assert result.message == message
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_bounds_variants(self, locally_biased):
+        # test that new and old bounds yield same result
+
+        lb = [-6., 1., -5.]
+        ub = [-1., 3., 5.]
+        x_opt = np.array([-1., 1., 0.])
+        bounds_old = list(zip(lb, ub))
+        bounds_new = Bounds(lb, ub)
+
+        res_old_bounds = direct(self.sphere, bounds_old,
+                                locally_biased=locally_biased)
+        res_new_bounds = direct(self.sphere, bounds_new,
+                                locally_biased=locally_biased)
+
+        assert res_new_bounds.nfev == res_old_bounds.nfev
+        assert res_new_bounds.message == res_old_bounds.message
+        assert res_new_bounds.success == res_old_bounds.success
+        assert res_new_bounds.nit == res_old_bounds.nit
+        assert_allclose(res_new_bounds.x, res_old_bounds.x)
+        assert_allclose(res_new_bounds.x, x_opt, rtol=1e-2)
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    @pytest.mark.parametrize("eps", [1e-5, 1e-4, 1e-3])
+    def test_epsilon(self, eps, locally_biased):
+        result = direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                        eps=eps, vol_tol=1e-6,
+                        locally_biased=locally_biased)
+        assert result.status == 4
+        assert result.success
+
+    @pytest.mark.xslow
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_no_segmentation_fault(self, locally_biased):
+        # test that an excessive number of function evaluations
+        # does not result in segmentation fault
+        bounds = [(-5., 20.)] * 100
+        result = direct(self.sphere, bounds, maxfun=10000000,
+                        maxiter=1000000, locally_biased=locally_biased)
+        assert result is not None
+
+    @pytest.mark.parametrize("locally_biased", [True, False])
+    def test_inf_fun(self, locally_biased):
+        # test that an objective value of infinity does not crash DIRECT
+        bounds = [(-5., 5.)] * 2
+        result = direct(self.inf_fun, bounds,
+                        locally_biased=locally_biased)
+        assert result is not None
+
+    @pytest.mark.parametrize("len_tol", [-1, 2])
+    def test_len_tol_validation(self, len_tol):
+        error_msg = "len_tol must be between 0 and 1."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   len_tol=len_tol)
+
+    @pytest.mark.parametrize("vol_tol", [-1, 2])
+    def test_vol_tol_validation(self, vol_tol):
+        error_msg = "vol_tol must be between 0 and 1."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   vol_tol=vol_tol)
+
+    @pytest.mark.parametrize("f_min_rtol", [-1, 2])
+    def test_fmin_rtol_validation(self, f_min_rtol):
+        error_msg = "f_min_rtol must be between 0 and 1."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   f_min_rtol=f_min_rtol, f_min=0.)
+
+    @pytest.mark.parametrize("maxfun", [1.5, "string", (1, 2)])
+    def test_maxfun_wrong_type(self, maxfun):
+        error_msg = "maxfun must be of type int."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   maxfun=maxfun)
+
+    @pytest.mark.parametrize("maxiter", [1.5, "string", (1, 2)])
+    def test_maxiter_wrong_type(self, maxiter):
+        error_msg = "maxiter must be of type int."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   maxiter=maxiter)
+
+    def test_negative_maxiter(self):
+        error_msg = "maxiter must be > 0."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   maxiter=-1)
+
+    def test_negative_maxfun(self):
+        error_msg = "maxfun must be > 0."
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   maxfun=-1)
+
+    @pytest.mark.parametrize("bounds", ["bounds", 2., 0])
+    def test_invalid_bounds_type(self, bounds):
+        error_msg = ("bounds must be a sequence or "
+                     "instance of Bounds class")
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, bounds)
+
+    @pytest.mark.parametrize("bounds",
+                             [Bounds([-1., -1], [-2, 1]),
+                              Bounds([-np.nan, -1], [-2, np.nan]),
+                              ]
+                             )
+    def test_incorrect_bounds(self, bounds):
+        error_msg = 'Bounds are not consistent min < max'
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, bounds)
+
+    def test_inf_bounds(self):
+        error_msg = 'Bounds must not be inf.'
+        bounds = Bounds([-np.inf, -1], [-2, np.inf])
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, bounds)
+
+    @pytest.mark.parametrize("locally_biased", ["bias", [0, 0], 2.])
+    def test_locally_biased_validation(self, locally_biased):
+        error_msg = 'locally_biased must be True or False.'
+        with pytest.raises(ValueError, match=error_msg):
+            direct(self.styblinski_tang, self.bounds_stylinski_tang,
+                   locally_biased=locally_biased)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_extending.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_extending.py
new file mode 100644
index 0000000000000000000000000000000000000000..80e25f28891c2b29f3d6963b3335351957e4bc78
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_extending.py
@@ -0,0 +1,24 @@
+import os
+import platform
+
+import pytest
+
+from scipy._lib._testutils import IS_EDITABLE, _test_cython_extension, cython
+
+
+@pytest.mark.fail_slow(20)
+# essential per https://github.com/scipy/scipy/pull/20487#discussion_r1567057247
+@pytest.mark.skipif(IS_EDITABLE,
+                    reason='Editable install cannot find .pxd headers.')
+@pytest.mark.skipif(platform.machine() in ["wasm32", "wasm64"],
+                    reason="Can't start subprocess")
+@pytest.mark.skipif(cython is None, reason="requires cython")
+def test_cython(tmp_path):
+    srcdir = os.path.dirname(os.path.dirname(__file__))
+    extensions, extensions_cpp = _test_cython_extension(tmp_path, srcdir)
+    # actually test the cython c-extensions
+    # From docstring for scipy.optimize.cython_optimize module
+    x = extensions.brentq_example()
+    assert x == 0.6999942848231314
+    x = extensions_cpp.brentq_example()
+    assert x == 0.6999942848231314
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_hessian_update_strategy.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_hessian_update_strategy.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe9d7a059b471f765af5be8de8108a1811fe4482
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_hessian_update_strategy.py
@@ -0,0 +1,292 @@
+import re
+from copy import deepcopy
+
+import numpy as np
+import pytest
+from numpy.linalg import norm
+from numpy.testing import (TestCase, assert_array_almost_equal,
+                           assert_array_equal, assert_array_less)
+from scipy.optimize import (BFGS, SR1)
+
+
+class Rosenbrock:
+    """Rosenbrock function.
+
+    The following optimization problem:
+        minimize sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
+    """
+
+    def __init__(self, n=2, random_state=0):
+        rng = np.random.RandomState(random_state)
+        self.x0 = rng.uniform(-1, 1, n)
+        self.x_opt = np.ones(n)
+
+    def fun(self, x):
+        x = np.asarray(x)
+        r = np.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0,
+                   axis=0)
+        return r
+
+    def grad(self, x):
+        x = np.asarray(x)
+        xm = x[1:-1]
+        xm_m1 = x[:-2]
+        xm_p1 = x[2:]
+        der = np.zeros_like(x)
+        der[1:-1] = (200 * (xm - xm_m1**2) -
+                     400 * (xm_p1 - xm**2) * xm - 2 * (1 - xm))
+        der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
+        der[-1] = 200 * (x[-1] - x[-2]**2)
+        return der
+
+    def hess(self, x):
+        x = np.atleast_1d(x)
+        H = np.diag(-400 * x[:-1], 1) - np.diag(400 * x[:-1], -1)
+        diagonal = np.zeros(len(x), dtype=x.dtype)
+        diagonal[0] = 1200 * x[0]**2 - 400 * x[1] + 2
+        diagonal[-1] = 200
+        diagonal[1:-1] = 202 + 1200 * x[1:-1]**2 - 400 * x[2:]
+        H = H + np.diag(diagonal)
+        return H
+
+
+class TestHessianUpdateStrategy(TestCase):
+
+
+    def test_hessian_initialization(self):
+
+        ndims = 5
+        symmetric_matrix = np.array([[43, 24, 33, 34, 49],
+                                     [24, 36, 44, 15, 44],
+                                     [33, 44, 37, 1, 30],
+                                     [34, 15, 1, 5, 46],
+                                     [49, 44, 30, 46, 22]])
+        init_scales = (
+            ('auto', np.eye(ndims)),
+            (2, np.eye(ndims) * 2),
+            (np.arange(1, ndims + 1) * np.eye(ndims),
+             np.arange(1, ndims + 1) * np.eye(ndims)),
+            (symmetric_matrix, symmetric_matrix),)
+        for approx_type in ['hess', 'inv_hess']:
+            for init_scale, true_matrix in init_scales:
+                # large min_{denominator,curvatur} makes them skip an update,
+                # so we can have our initial matrix
+                quasi_newton = (BFGS(init_scale=init_scale,
+                                     min_curvature=1e50,
+                                     exception_strategy='skip_update'),
+                                SR1(init_scale=init_scale,
+                                    min_denominator=1e50))
+
+                for qn in quasi_newton:
+                    qn.initialize(ndims, approx_type)
+                    B = qn.get_matrix()
+
+                    assert_array_equal(B, np.eye(ndims))
+                    # don't test the auto init scale
+                    if isinstance(init_scale, str) and init_scale == 'auto':
+                        continue
+
+                    qn.update(np.ones(ndims) * 1e-5, np.arange(ndims) + 0.2)
+                    B = qn.get_matrix()
+                    assert_array_equal(B, true_matrix)
+
+    # For this list of points, it is known
+    # that no exception occur during the
+    # Hessian update. Hence no update is
+    # skiped or damped.
+
+
+    def test_initialize_catch_illegal(self):
+        ndims = 3
+        # no complex allowed
+        inits_msg_errtype = ((complex(3.14),
+                              re.escape("float() argument must be a "
+                                        "string or a real number, "
+                                        "not 'complex'"),
+                              TypeError),
+
+                             (np.array([3.2, 2.3, 1.2]).astype(np.complex128),
+                              "init_scale contains complex elements, "
+                              "must be real.",
+                              TypeError),
+
+                             (np.array([[43, 24, 33],
+                                        [24, 36, 44, ],
+                                        [33, 44, 37, ]]).astype(np.complex128),
+                              "init_scale contains complex elements, "
+                              "must be real.",
+                              TypeError),
+
+                             # not square
+                             (np.array([[43, 55, 66]]),
+                              re.escape(
+                                  "If init_scale is an array, it must have the "
+                                  "dimensions of the hess/inv_hess: (3, 3)."
+                                  " Got (1, 3)."),
+                              ValueError),
+
+                             # not symmetric
+                             (np.array([[43, 24, 33],
+                                        [24.1, 36, 44, ],
+                                        [33, 44, 37, ]]),
+                              re.escape("If init_scale is an array, it must be"
+                                        " symmetric (passing scipy.linalg.issymmetric)"
+                                        " to be an approximation of a hess/inv_hess."),
+                              ValueError),
+                             )
+        for approx_type in ['hess', 'inv_hess']:
+            for init_scale, message, errortype in inits_msg_errtype:
+                # large min_{denominator,curvatur} makes it skip an update,
+                # so we can retrieve our initial matrix
+                quasi_newton = (BFGS(init_scale=init_scale),
+                                SR1(init_scale=init_scale))
+
+                for qn in quasi_newton:
+                    qn.initialize(ndims, approx_type)
+                    with pytest.raises(errortype, match=message):
+                        qn.update(np.ones(ndims), np.arange(ndims))
+
+    def test_rosenbrock_with_no_exception(self):
+        # Define auxiliary problem
+        prob = Rosenbrock(n=5)
+        # Define iteration points
+        x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
+                  [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
+                  [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
+                  [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
+                  [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
+                  [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
+                  [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184],
+                  [0.2354930, 0.0443711, 0.0173959, 0.0041872, 0.00794563],
+                  [0.4168118, 0.1433867, 0.0111714, 0.0126265, -0.00658537],
+                  [0.4681972, 0.2153273, 0.0225249, 0.0152704, -0.00463809],
+                  [0.6023068, 0.3346815, 0.0731108, 0.0186618, -0.00371541],
+                  [0.6415743, 0.3985468, 0.1324422, 0.0214160, -0.00062401],
+                  [0.7503690, 0.5447616, 0.2804541, 0.0539851, 0.00242230],
+                  [0.7452626, 0.5644594, 0.3324679, 0.0865153, 0.00454960],
+                  [0.8059782, 0.6586838, 0.4229577, 0.1452990, 0.00976702],
+                  [0.8549542, 0.7226562, 0.4991309, 0.2420093, 0.02772661],
+                  [0.8571332, 0.7285741, 0.5279076, 0.2824549, 0.06030276],
+                  [0.8835633, 0.7727077, 0.5957984, 0.3411303, 0.09652185],
+                  [0.9071558, 0.8299587, 0.6771400, 0.4402896, 0.17469338],
+                  [0.9190793, 0.8486480, 0.7163332, 0.5083780, 0.26107691],
+                  [0.9371223, 0.8762177, 0.7653702, 0.5773109, 0.32181041],
+                  [0.9554613, 0.9119893, 0.8282687, 0.6776178, 0.43162744],
+                  [0.9545744, 0.9099264, 0.8270244, 0.6822220, 0.45237623],
+                  [0.9688112, 0.9351710, 0.8730961, 0.7546601, 0.56622448],
+                  [0.9743227, 0.9491953, 0.9005150, 0.8086497, 0.64505437],
+                  [0.9807345, 0.9638853, 0.9283012, 0.8631675, 0.73812581],
+                  [0.9886746, 0.9777760, 0.9558950, 0.9123417, 0.82726553],
+                  [0.9899096, 0.9803828, 0.9615592, 0.9255600, 0.85822149],
+                  [0.9969510, 0.9935441, 0.9864657, 0.9726775, 0.94358663],
+                  [0.9979533, 0.9960274, 0.9921724, 0.9837415, 0.96626288],
+                  [0.9995981, 0.9989171, 0.9974178, 0.9949954, 0.99023356],
+                  [1.0002640, 1.0005088, 1.0010594, 1.0021161, 1.00386912],
+                  [0.9998903, 0.9998459, 0.9997795, 0.9995484, 0.99916305],
+                  [1.0000008, 0.9999905, 0.9999481, 0.9998903, 0.99978047],
+                  [1.0000004, 0.9999983, 1.0000001, 1.0000031, 1.00000297],
+                  [0.9999995, 1.0000003, 1.0000005, 1.0000001, 1.00000032],
+                  [0.9999999, 0.9999997, 0.9999994, 0.9999989, 0.99999786],
+                  [0.9999999, 0.9999999, 0.9999999, 0.9999999, 0.99999991]]
+        # Get iteration points
+        grad_list = [prob.grad(x) for x in x_list]
+        delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
+                   for i in range(len(x_list)-1)]
+        delta_grad = [grad_list[i+1]-grad_list[i]
+                      for i in range(len(grad_list)-1)]
+        # Check curvature condition
+        for s, y in zip(delta_x, delta_grad):
+            if np.dot(s, y) <= 0:
+                raise ArithmeticError()
+        # Define QuasiNewton update
+        for quasi_newton in (BFGS(init_scale=1, min_curvature=1e-4),
+                             SR1(init_scale=1)):
+            hess = deepcopy(quasi_newton)
+            inv_hess = deepcopy(quasi_newton)
+            hess.initialize(len(x_list[0]), 'hess')
+            inv_hess.initialize(len(x_list[0]), 'inv_hess')
+            # Compare the hessian and its inverse
+            for s, y in zip(delta_x, delta_grad):
+                hess.update(s, y)
+                inv_hess.update(s, y)
+                B = hess.get_matrix()
+                H = inv_hess.get_matrix()
+                assert_array_almost_equal(np.linalg.inv(B), H, decimal=10)
+            B_true = prob.hess(x_list[len(delta_x)])
+            assert_array_less(norm(B - B_true)/norm(B_true), 0.1)
+
+    def test_SR1_skip_update(self):
+        # Define auxiliary problem
+        prob = Rosenbrock(n=5)
+        # Define iteration points
+        x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
+                  [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
+                  [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
+                  [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
+                  [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
+                  [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
+                  [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184],
+                  [0.2354930, 0.0443711, 0.0173959, 0.0041872, 0.00794563],
+                  [0.4168118, 0.1433867, 0.0111714, 0.0126265, -0.00658537],
+                  [0.4681972, 0.2153273, 0.0225249, 0.0152704, -0.00463809],
+                  [0.6023068, 0.3346815, 0.0731108, 0.0186618, -0.00371541],
+                  [0.6415743, 0.3985468, 0.1324422, 0.0214160, -0.00062401],
+                  [0.7503690, 0.5447616, 0.2804541, 0.0539851, 0.00242230],
+                  [0.7452626, 0.5644594, 0.3324679, 0.0865153, 0.00454960],
+                  [0.8059782, 0.6586838, 0.4229577, 0.1452990, 0.00976702],
+                  [0.8549542, 0.7226562, 0.4991309, 0.2420093, 0.02772661],
+                  [0.8571332, 0.7285741, 0.5279076, 0.2824549, 0.06030276],
+                  [0.8835633, 0.7727077, 0.5957984, 0.3411303, 0.09652185],
+                  [0.9071558, 0.8299587, 0.6771400, 0.4402896, 0.17469338]]
+        # Get iteration points
+        grad_list = [prob.grad(x) for x in x_list]
+        delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
+                   for i in range(len(x_list)-1)]
+        delta_grad = [grad_list[i+1]-grad_list[i]
+                      for i in range(len(grad_list)-1)]
+        hess = SR1(init_scale=1, min_denominator=1e-2)
+        hess.initialize(len(x_list[0]), 'hess')
+        # Compare the Hessian and its inverse
+        for i in range(len(delta_x)-1):
+            s = delta_x[i]
+            y = delta_grad[i]
+            hess.update(s, y)
+        # Test skip update
+        B = np.copy(hess.get_matrix())
+        s = delta_x[17]
+        y = delta_grad[17]
+        hess.update(s, y)
+        B_updated = np.copy(hess.get_matrix())
+        assert_array_equal(B, B_updated)
+
+    def test_BFGS_skip_update(self):
+        # Define auxiliary problem
+        prob = Rosenbrock(n=5)
+        # Define iteration points
+        x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
+                  [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
+                  [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
+                  [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
+                  [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
+                  [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
+                  [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184]]
+        # Get iteration points
+        grad_list = [prob.grad(x) for x in x_list]
+        delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
+                   for i in range(len(x_list)-1)]
+        delta_grad = [grad_list[i+1]-grad_list[i]
+                      for i in range(len(grad_list)-1)]
+        hess = BFGS(init_scale=1, min_curvature=10)
+        hess.initialize(len(x_list[0]), 'hess')
+        # Compare the Hessian and its inverse
+        for i in range(len(delta_x)-1):
+            s = delta_x[i]
+            y = delta_grad[i]
+            hess.update(s, y)
+        # Test skip update
+        B = np.copy(hess.get_matrix())
+        s = delta_x[5]
+        y = delta_grad[5]
+        hess.update(s, y)
+        B_updated = np.copy(hess.get_matrix())
+        assert_array_equal(B, B_updated)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_isotonic_regression.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_isotonic_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..b49c56db5b4470c1e4e0f787df52c80eb055c120
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_isotonic_regression.py
@@ -0,0 +1,167 @@
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal
+import pytest
+
+from scipy.optimize._pava_pybind import pava
+from scipy.optimize import isotonic_regression
+
+
+class TestIsotonicRegression:
+    @pytest.mark.parametrize(
+        ("y", "w", "msg"),
+        [
+            ([[0, 1]], None,
+             "array has incorrect number of dimensions: 2; expected 1"),
+            ([0, 1], [[1, 2]],
+             "Input arrays y and w must have one dimension of equal length"),
+            ([0, 1], [1],
+             "Input arrays y and w must have one dimension of equal length"),
+            (1, [1, 2],
+             "Input arrays y and w must have one dimension of equal length"),
+            ([1, 2], 1,
+             "Input arrays y and w must have one dimension of equal length"),
+            ([0, 1], [0, 1],
+             "Weights w must be strictly positive"),
+        ]
+    )
+    def test_raise_error(self, y, w, msg):
+        with pytest.raises(ValueError, match=msg):
+            isotonic_regression(y=y, weights=w)
+
+    def test_simple_pava(self):
+        # Test case of Busing 2020
+        # https://doi.org/10.18637/jss.v102.c01
+        y = np.array([8, 4, 8, 2, 2, 0, 8], dtype=np.float64)
+        w = np.ones_like(y)
+        r = np.full(shape=y.shape[0] + 1, fill_value=-1, dtype=np.intp)
+        pava(y, w, r)
+        assert_allclose(y, [4, 4, 4, 4, 4, 4, 8])
+        # Only first 2 elements of w are changed.
+        assert_allclose(w, [6, 1, 1, 1, 1, 1, 1])
+        # Only first 3 elements of r are changed.
+        assert_allclose(r, [0, 6, 7, -1, -1, -1, -1, -1])
+
+    @pytest.mark.parametrize("y_dtype", [np.float64, np.float32, np.int64, np.int32])
+    @pytest.mark.parametrize("w_dtype", [np.float64, np.float32, np.int64, np.int32])
+    @pytest.mark.parametrize("w", [None, "ones"])
+    def test_simple_isotonic_regression(self, w, w_dtype, y_dtype):
+        # Test case of Busing 2020
+        # https://doi.org/10.18637/jss.v102.c01
+        y = np.array([8, 4, 8, 2, 2, 0, 8], dtype=y_dtype)
+        if w is not None:
+            w = np.ones_like(y, dtype=w_dtype)
+        res = isotonic_regression(y, weights=w)
+        assert res.x.dtype == np.float64
+        assert res.weights.dtype == np.float64
+        assert_allclose(res.x, [4, 4, 4, 4, 4, 4, 8])
+        assert_allclose(res.weights, [6, 1])
+        assert_allclose(res.blocks, [0, 6, 7])
+        # Assert that y was not overwritten
+        assert_equal(y, np.array([8, 4, 8, 2, 2, 0, 8], dtype=np.float64))
+
+    @pytest.mark.parametrize("increasing", [True, False])
+    def test_linspace(self, increasing):
+        n = 10
+        y = np.linspace(0, 1, n) if increasing else np.linspace(1, 0, n)
+        res = isotonic_regression(y, increasing=increasing)
+        assert_allclose(res.x, y)
+        assert_allclose(res.blocks, np.arange(n + 1))
+
+    def test_weights(self):
+        w = np.array([1, 2, 5, 0.5, 0.5, 0.5, 1, 3])
+        y = np.array([3, 2, 1, 10, 9, 8, 20, 10])
+        res = isotonic_regression(y, weights=w)
+        assert_allclose(res.x, [12/8, 12/8, 12/8, 9, 9, 9, 50/4, 50/4])
+        assert_allclose(res.weights, [8, 1.5, 4])
+        assert_allclose(res.blocks, [0, 3, 6, 8])
+
+        # weights are like repeated observations, we repeat the 3rd element 5
+        # times.
+        w2 = np.array([1, 2, 1, 1, 1, 1, 1, 0.5, 0.5, 0.5, 1, 3])
+        y2 = np.array([3, 2, 1, 1, 1, 1, 1, 10, 9, 8, 20, 10])
+        res2 = isotonic_regression(y2, weights=w2)
+        assert_allclose(np.diff(res2.x[0:7]), 0)
+        assert_allclose(res2.x[4:], res.x)
+        assert_allclose(res2.weights, res.weights)
+        assert_allclose(res2.blocks[1:] - 4, res.blocks[1:])
+
+    def test_against_R_monotone(self):
+        y = [0, 6, 8, 3, 5, 2, 1, 7, 9, 4]
+        res = isotonic_regression(y)
+        # R code
+        # library(monotone)
+        # options(digits=8)
+        # monotone(c(0, 6, 8, 3, 5, 2, 1, 7, 9, 4))
+        x_R = [
+            0, 4.1666667, 4.1666667, 4.1666667, 4.1666667, 4.1666667,
+            4.1666667, 6.6666667, 6.6666667, 6.6666667,
+        ]
+        assert_allclose(res.x, x_R)
+        assert_equal(res.blocks, [0, 1, 7, 10])
+
+        n = 100
+        y = np.linspace(0, 1, num=n, endpoint=False)
+        y = 5 * y + np.sin(10 * y)
+        res = isotonic_regression(y)
+        # R code
+        # library(monotone)
+        # n <- 100
+        # y <- 5 * ((1:n)-1)/n + sin(10 * ((1:n)-1)/n)
+        # options(digits=8)
+        # monotone(y)
+        x_R = [
+            0.00000000, 0.14983342, 0.29866933, 0.44552021, 0.58941834, 0.72942554,
+            0.86464247, 0.99421769, 1.11735609, 1.23332691, 1.34147098, 1.44120736,
+            1.53203909, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100, 1.57081100,
+            1.57081100, 1.57081100, 1.57081100, 1.62418532, 1.71654534, 1.81773256,
+            1.92723551, 2.04445967, 2.16873336, 2.29931446, 2.43539782, 2.57612334,
+            2.72058450, 2.86783750, 3.01691060, 3.16681390, 3.31654920, 3.46511999,
+            3.61154136, 3.75484992, 3.89411335, 4.02843976, 4.15698660, 4.27896904,
+            4.39366786, 4.50043662, 4.59870810, 4.68799998, 4.76791967, 4.83816823,
+            4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130,
+            4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130,
+            4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130, 4.86564130,
+            4.86564130, 4.86564130, 4.86564130, 4.86564130,
+        ]
+        assert_allclose(res.x, x_R)
+
+        # Test increasing
+        assert np.all(np.diff(res.x) >= 0)
+
+        # Test balance property: sum(y) == sum(x)
+        assert_allclose(np.sum(res.x), np.sum(y))
+
+        # Reverse order
+        res_inv = isotonic_regression(-y, increasing=False)
+        assert_allclose(-res_inv.x, res.x)
+        assert_equal(res_inv.blocks, res.blocks)
+
+    def test_readonly(self):
+        x = np.arange(3, dtype=float)
+        w = np.ones(3, dtype=float)
+
+        x.flags.writeable = False
+        w.flags.writeable = False
+
+        res = isotonic_regression(x, weights=w)
+        assert np.all(np.isfinite(res.x))
+        assert np.all(np.isfinite(res.weights))
+        assert np.all(np.isfinite(res.blocks))
+
+    def test_non_contiguous_arrays(self):
+        x = np.arange(10, dtype=float)[::3]
+        w = np.ones(10, dtype=float)[::3]
+        assert not x.flags.c_contiguous
+        assert not x.flags.f_contiguous
+        assert not w.flags.c_contiguous
+        assert not w.flags.f_contiguous
+
+        res = isotonic_regression(x, weights=w)
+        assert np.all(np.isfinite(res.x))
+        assert np.all(np.isfinite(res.weights))
+        assert np.all(np.isfinite(res.blocks))
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_hessinv.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_hessinv.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e4452cd61c5400c13f4f239055352bae754ad7e
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_hessinv.py
@@ -0,0 +1,43 @@
+import numpy as np
+from numpy.testing import assert_allclose
+import scipy.linalg
+from scipy.optimize import minimize
+
+
+def test_1():
+    def f(x):
+        return x**4, 4*x**3
+
+    for gtol in [1e-8, 1e-12, 1e-20]:
+        for maxcor in range(20, 35):
+            result = minimize(fun=f, jac=True, method='L-BFGS-B', x0=20,
+                options={'gtol': gtol, 'maxcor': maxcor})
+
+            H1 = result.hess_inv(np.array([1])).reshape(1,1)
+            H2 = result.hess_inv.todense()
+
+            assert_allclose(H1, H2)
+
+
+def test_2():
+    H0 = [[3, 0], [1, 2]]
+
+    def f(x):
+        return np.dot(x, np.dot(scipy.linalg.inv(H0), x))
+
+    result1 = minimize(fun=f, method='L-BFGS-B', x0=[10, 20])
+    result2 = minimize(fun=f, method='BFGS', x0=[10, 20])
+
+    H1 = result1.hess_inv.todense()
+
+    H2 = np.vstack((
+        result1.hess_inv(np.array([1, 0])),
+        result1.hess_inv(np.array([0, 1]))))
+
+    assert_allclose(
+        result1.hess_inv(np.array([1, 0]).reshape(2,1)).reshape(-1),
+        result1.hess_inv(np.array([1, 0])))
+    assert_allclose(H1, H2)
+    assert_allclose(H1, result2.hess_inv, rtol=1e-2, atol=0.03)
+
+
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_setulb.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_setulb.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b2a75684a27f31c07d5b8b20bd757385554edef
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lbfgsb_setulb.py
@@ -0,0 +1,128 @@
+import numpy as np
+from scipy.optimize import _lbfgsb, minimize
+
+
+def objfun(x):
+    """simplified objective func to test lbfgsb bound violation"""
+    x0 = [0.8750000000000278,
+          0.7500000000000153,
+          0.9499999999999722,
+          0.8214285714285992,
+          0.6363636363636085]
+    x1 = [1.0, 0.0, 1.0, 0.0, 0.0]
+    x2 = [1.0,
+          0.0,
+          0.9889733043149325,
+          0.0,
+          0.026353554421041155]
+    x3 = [1.0,
+          0.0,
+          0.9889917442915558,
+          0.0,
+          0.020341986743231205]
+
+    f0 = 5163.647901211178
+    f1 = 5149.8181642072905
+    f2 = 5149.379332309634
+    f3 = 5149.374490771297
+
+    g0 = np.array([-0.5934820547965749,
+                   1.6251549718258351,
+                   -71.99168459202559,
+                   5.346636965797545,
+                   37.10732723092604])
+    g1 = np.array([-0.43295349282641515,
+                   1.008607936794592,
+                   18.223666726602975,
+                   31.927010036981997,
+                   -19.667512518739386])
+    g2 = np.array([-0.4699874455100256,
+                   0.9466285353668347,
+                   -0.016874360242016825,
+                   48.44999161133457,
+                   5.819631620590712])
+    g3 = np.array([-0.46970678696829116,
+                   0.9612719312174818,
+                   0.006129809488833699,
+                   48.43557729419473,
+                   6.005481418498221])
+
+    if np.allclose(x, x0):
+        f = f0
+        g = g0
+    elif np.allclose(x, x1):
+        f = f1
+        g = g1
+    elif np.allclose(x, x2):
+        f = f2
+        g = g2
+    elif np.allclose(x, x3):
+        f = f3
+        g = g3
+    else:
+        raise ValueError(
+            'Simplified objective function not defined '
+            'at requested point')
+    return (np.copy(f), np.copy(g))
+
+
+def test_setulb_floatround():
+    """test if setulb() violates bounds
+
+    checks for violation due to floating point rounding error
+    """
+
+    n = 5
+    m = 10
+    factr = 1e7
+    pgtol = 1e-5
+    maxls = 20
+    iprint = -1
+    nbd = np.full((n,), 2)
+    low_bnd = np.zeros(n, np.float64)
+    upper_bnd = np.ones(n, np.float64)
+
+    x0 = np.array(
+        [0.8750000000000278,
+         0.7500000000000153,
+         0.9499999999999722,
+         0.8214285714285992,
+         0.6363636363636085])
+    x = np.copy(x0)
+
+    f = np.array(0.0, np.float64)
+    g = np.zeros(n, np.float64)
+
+    fortran_int = _lbfgsb.types.intvar.dtype
+
+    wa = np.zeros(2*m*n + 5*n + 11*m*m + 8*m, np.float64)
+    iwa = np.zeros(3*n, fortran_int)
+    task = np.zeros(1, 'S60')
+    csave = np.zeros(1, 'S60')
+    lsave = np.zeros(4, fortran_int)
+    isave = np.zeros(44, fortran_int)
+    dsave = np.zeros(29, np.float64)
+
+    task[:] = b'START'
+
+    for n_iter in range(7):  # 7 steps required to reproduce error
+        f, g = objfun(x)
+
+        _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
+                       pgtol, wa, iwa, task, iprint, csave, lsave,
+                       isave, dsave, maxls)
+
+        assert (x <= upper_bnd).all() and (x >= low_bnd).all(), (
+            "_lbfgsb.setulb() stepped to a point outside of the bounds")
+
+
+def test_gh_issue18730():
+    # issue 18730 reported that l-bfgs-b did not work with objectives
+    # returning single precision gradient arrays
+    def fun_single_precision(x):
+        x = x.astype(np.float32)
+        return np.sum(x**2), (2*x)
+
+    res = minimize(fun_single_precision, x0=np.array([1., 1.]), jac=True,
+                   method="l-bfgs-b")
+    np.testing.assert_allclose(res.fun, 0., atol=1e-15)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_least_squares.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_least_squares.py
new file mode 100644
index 0000000000000000000000000000000000000000..68cfc421c987b6cdfcaa6b442bbd7927cf215ac9
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_least_squares.py
@@ -0,0 +1,874 @@
+from itertools import product
+
+import numpy as np
+from numpy.linalg import norm
+from numpy.testing import (assert_, assert_allclose,
+                           assert_equal, suppress_warnings)
+import pytest
+from pytest import raises as assert_raises
+from scipy.sparse import issparse, lil_matrix
+from scipy.sparse.linalg import aslinearoperator
+
+from scipy.optimize import least_squares, Bounds
+from scipy.optimize._lsq.least_squares import IMPLEMENTED_LOSSES
+from scipy.optimize._lsq.common import EPS, make_strictly_feasible, CL_scaling_vector
+
+
+def fun_trivial(x, a=0):
+    return (x - a)**2 + 5.0
+
+
+def jac_trivial(x, a=0.0):
+    return 2 * (x - a)
+
+
+def fun_2d_trivial(x):
+    return np.array([x[0], x[1]])
+
+
+def jac_2d_trivial(x):
+    return np.identity(2)
+
+
+def fun_rosenbrock(x):
+    return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
+
+
+def jac_rosenbrock(x):
+    return np.array([
+        [-20 * x[0], 10],
+        [-1, 0]
+    ])
+
+
+def jac_rosenbrock_bad_dim(x):
+    return np.array([
+        [-20 * x[0], 10],
+        [-1, 0],
+        [0.0, 0.0]
+    ])
+
+
+def fun_rosenbrock_cropped(x):
+    return fun_rosenbrock(x)[0]
+
+
+def jac_rosenbrock_cropped(x):
+    return jac_rosenbrock(x)[0]
+
+
+# When x is 1-D array, return is 2-D array.
+def fun_wrong_dimensions(x):
+    return np.array([x, x**2, x**3])
+
+
+def jac_wrong_dimensions(x, a=0.0):
+    return np.atleast_3d(jac_trivial(x, a=a))
+
+
+def fun_bvp(x):
+    n = int(np.sqrt(x.shape[0]))
+    u = np.zeros((n + 2, n + 2))
+    x = x.reshape((n, n))
+    u[1:-1, 1:-1] = x
+    y = u[:-2, 1:-1] + u[2:, 1:-1] + u[1:-1, :-2] + u[1:-1, 2:] - 4 * x + x**3
+    return y.ravel()
+
+
+class BroydenTridiagonal:
+    def __init__(self, n=100, mode='sparse'):
+        np.random.seed(0)
+
+        self.n = n
+
+        self.x0 = -np.ones(n)
+        self.lb = np.linspace(-2, -1.5, n)
+        self.ub = np.linspace(-0.8, 0.0, n)
+
+        self.lb += 0.1 * np.random.randn(n)
+        self.ub += 0.1 * np.random.randn(n)
+
+        self.x0 += 0.1 * np.random.randn(n)
+        self.x0 = make_strictly_feasible(self.x0, self.lb, self.ub)
+
+        if mode == 'sparse':
+            self.sparsity = lil_matrix((n, n), dtype=int)
+            i = np.arange(n)
+            self.sparsity[i, i] = 1
+            i = np.arange(1, n)
+            self.sparsity[i, i - 1] = 1
+            i = np.arange(n - 1)
+            self.sparsity[i, i + 1] = 1
+
+            self.jac = self._jac
+        elif mode == 'operator':
+            self.jac = lambda x: aslinearoperator(self._jac(x))
+        elif mode == 'dense':
+            self.sparsity = None
+            self.jac = lambda x: self._jac(x).toarray()
+        else:
+            assert_(False)
+
+    def fun(self, x):
+        f = (3 - x) * x + 1
+        f[1:] -= x[:-1]
+        f[:-1] -= 2 * x[1:]
+        return f
+
+    def _jac(self, x):
+        J = lil_matrix((self.n, self.n))
+        i = np.arange(self.n)
+        J[i, i] = 3 - 2 * x
+        i = np.arange(1, self.n)
+        J[i, i - 1] = -1
+        i = np.arange(self.n - 1)
+        J[i, i + 1] = -2
+        return J
+
+
+class ExponentialFittingProblem:
+    """Provide data and function for exponential fitting in the form
+    y = a + exp(b * x) + noise."""
+
+    def __init__(self, a, b, noise, n_outliers=1, x_range=(-1, 1),
+                 n_points=11, random_seed=None):
+        np.random.seed(random_seed)
+        self.m = n_points
+        self.n = 2
+
+        self.p0 = np.zeros(2)
+        self.x = np.linspace(x_range[0], x_range[1], n_points)
+
+        self.y = a + np.exp(b * self.x)
+        self.y += noise * np.random.randn(self.m)
+
+        outliers = np.random.randint(0, self.m, n_outliers)
+        self.y[outliers] += 50 * noise * np.random.rand(n_outliers)
+
+        self.p_opt = np.array([a, b])
+
+    def fun(self, p):
+        return p[0] + np.exp(p[1] * self.x) - self.y
+
+    def jac(self, p):
+        J = np.empty((self.m, self.n))
+        J[:, 0] = 1
+        J[:, 1] = self.x * np.exp(p[1] * self.x)
+        return J
+
+
+def cubic_soft_l1(z):
+    rho = np.empty((3, z.size))
+
+    t = 1 + z
+    rho[0] = 3 * (t**(1/3) - 1)
+    rho[1] = t ** (-2/3)
+    rho[2] = -2/3 * t**(-5/3)
+
+    return rho
+
+
+LOSSES = list(IMPLEMENTED_LOSSES.keys()) + [cubic_soft_l1]
+
+
+class BaseMixin:
+    def test_basic(self):
+        # Test that the basic calling sequence works.
+        res = least_squares(fun_trivial, 2., method=self.method)
+        assert_allclose(res.x, 0, atol=1e-4)
+        assert_allclose(res.fun, fun_trivial(res.x))
+
+    def test_args_kwargs(self):
+        # Test that args and kwargs are passed correctly to the functions.
+        a = 3.0
+        for jac in ['2-point', '3-point', 'cs', jac_trivial]:
+            with suppress_warnings() as sup:
+                sup.filter(
+                    UserWarning,
+                    "jac='(3-point|cs)' works equivalently to '2-point' for method='lm'"
+                )
+                res = least_squares(fun_trivial, 2.0, jac, args=(a,),
+                                    method=self.method)
+                res1 = least_squares(fun_trivial, 2.0, jac, kwargs={'a': a},
+                                    method=self.method)
+
+            assert_allclose(res.x, a, rtol=1e-4)
+            assert_allclose(res1.x, a, rtol=1e-4)
+
+            assert_raises(TypeError, least_squares, fun_trivial, 2.0,
+                          args=(3, 4,), method=self.method)
+            assert_raises(TypeError, least_squares, fun_trivial, 2.0,
+                          kwargs={'kaboom': 3}, method=self.method)
+
+    def test_jac_options(self):
+        for jac in ['2-point', '3-point', 'cs', jac_trivial]:
+            with suppress_warnings() as sup:
+                sup.filter(
+                    UserWarning,
+                    "jac='(3-point|cs)' works equivalently to '2-point' for method='lm'"
+                )
+                res = least_squares(fun_trivial, 2.0, jac, method=self.method)
+            assert_allclose(res.x, 0, atol=1e-4)
+
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0, jac='oops',
+                      method=self.method)
+
+    def test_nfev_options(self):
+        for max_nfev in [None, 20]:
+            res = least_squares(fun_trivial, 2.0, max_nfev=max_nfev,
+                                method=self.method)
+            assert_allclose(res.x, 0, atol=1e-4)
+
+    def test_x_scale_options(self):
+        for x_scale in [1.0, np.array([0.5]), 'jac']:
+            res = least_squares(fun_trivial, 2.0, x_scale=x_scale)
+            assert_allclose(res.x, 0)
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, x_scale='auto', method=self.method)
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, x_scale=-1.0, method=self.method)
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, x_scale=None, method=self.method)
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, x_scale=1.0+2.0j, method=self.method)
+
+    def test_diff_step(self):
+        # res1 and res2 should be equivalent.
+        # res2 and res3 should be different.
+        res1 = least_squares(fun_trivial, 2.0, diff_step=1e-1,
+                             method=self.method)
+        res2 = least_squares(fun_trivial, 2.0, diff_step=-1e-1,
+                             method=self.method)
+        res3 = least_squares(fun_trivial, 2.0,
+                             diff_step=None, method=self.method)
+        assert_allclose(res1.x, 0, atol=1e-4)
+        assert_allclose(res2.x, 0, atol=1e-4)
+        assert_allclose(res3.x, 0, atol=1e-4)
+        assert_equal(res1.x, res2.x)
+        assert_equal(res1.nfev, res2.nfev)
+
+    def test_incorrect_options_usage(self):
+        assert_raises(TypeError, least_squares, fun_trivial, 2.0,
+                      method=self.method, options={'no_such_option': 100})
+        assert_raises(TypeError, least_squares, fun_trivial, 2.0,
+                      method=self.method, options={'max_nfev': 100})
+
+    def test_full_result(self):
+        # MINPACK doesn't work very well with factor=100 on this problem,
+        # thus using low 'atol'.
+        res = least_squares(fun_trivial, 2.0, method=self.method)
+        assert_allclose(res.x, 0, atol=1e-4)
+        assert_allclose(res.cost, 12.5)
+        assert_allclose(res.fun, 5)
+        assert_allclose(res.jac, 0, atol=1e-4)
+        assert_allclose(res.grad, 0, atol=1e-2)
+        assert_allclose(res.optimality, 0, atol=1e-2)
+        assert_equal(res.active_mask, 0)
+        if self.method == 'lm':
+            assert_(res.nfev < 30)
+            assert_(res.njev is None)
+        else:
+            assert_(res.nfev < 10)
+            assert_(res.njev < 10)
+        assert_(res.status > 0)
+        assert_(res.success)
+
+    def test_full_result_single_fev(self):
+        # MINPACK checks the number of nfev after the iteration,
+        # so it's hard to tell what he is going to compute.
+        if self.method == 'lm':
+            return
+
+        res = least_squares(fun_trivial, 2.0, method=self.method,
+                            max_nfev=1)
+        assert_equal(res.x, np.array([2]))
+        assert_equal(res.cost, 40.5)
+        assert_equal(res.fun, np.array([9]))
+        assert_equal(res.jac, np.array([[4]]))
+        assert_equal(res.grad, np.array([36]))
+        assert_equal(res.optimality, 36)
+        assert_equal(res.active_mask, np.array([0]))
+        assert_equal(res.nfev, 1)
+        assert_equal(res.njev, 1)
+        assert_equal(res.status, 0)
+        assert_equal(res.success, 0)
+
+    def test_rosenbrock(self):
+        x0 = [-2, 1]
+        x_opt = [1, 1]
+        for jac, x_scale, tr_solver in product(
+                ['2-point', '3-point', 'cs', jac_rosenbrock],
+                [1.0, np.array([1.0, 0.2]), 'jac'],
+                ['exact', 'lsmr']):
+            with suppress_warnings() as sup:
+                sup.filter(
+                    UserWarning,
+                    "jac='(3-point|cs)' works equivalently to '2-point' for method='lm'"
+                )
+                res = least_squares(fun_rosenbrock, x0, jac, x_scale=x_scale,
+                                    tr_solver=tr_solver, method=self.method)
+            assert_allclose(res.x, x_opt)
+
+    def test_rosenbrock_cropped(self):
+        x0 = [-2, 1]
+        if self.method == 'lm':
+            assert_raises(ValueError, least_squares, fun_rosenbrock_cropped,
+                          x0, method='lm')
+        else:
+            for jac, x_scale, tr_solver in product(
+                    ['2-point', '3-point', 'cs', jac_rosenbrock_cropped],
+                    [1.0, np.array([1.0, 0.2]), 'jac'],
+                    ['exact', 'lsmr']):
+                res = least_squares(
+                    fun_rosenbrock_cropped, x0, jac, x_scale=x_scale,
+                    tr_solver=tr_solver, method=self.method)
+                assert_allclose(res.cost, 0, atol=1e-14)
+
+    def test_fun_wrong_dimensions(self):
+        assert_raises(ValueError, least_squares, fun_wrong_dimensions,
+                      2.0, method=self.method)
+
+    def test_jac_wrong_dimensions(self):
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, jac_wrong_dimensions, method=self.method)
+
+    def test_fun_and_jac_inconsistent_dimensions(self):
+        x0 = [1, 2]
+        assert_raises(ValueError, least_squares, fun_rosenbrock, x0,
+                      jac_rosenbrock_bad_dim, method=self.method)
+
+    def test_x0_multidimensional(self):
+        x0 = np.ones(4).reshape(2, 2)
+        assert_raises(ValueError, least_squares, fun_trivial, x0,
+                      method=self.method)
+
+    def test_x0_complex_scalar(self):
+        x0 = 2.0 + 0.0*1j
+        assert_raises(ValueError, least_squares, fun_trivial, x0,
+                      method=self.method)
+
+    def test_x0_complex_array(self):
+        x0 = [1.0, 2.0 + 0.0*1j]
+        assert_raises(ValueError, least_squares, fun_trivial, x0,
+                      method=self.method)
+
+    def test_bvp(self):
+        # This test was introduced with fix #5556. It turned out that
+        # dogbox solver had a bug with trust-region radius update, which
+        # could block its progress and create an infinite loop. And this
+        # discrete boundary value problem is the one which triggers it.
+        n = 10
+        x0 = np.ones(n**2)
+        if self.method == 'lm':
+            max_nfev = 5000  # To account for Jacobian estimation.
+        else:
+            max_nfev = 100
+        res = least_squares(fun_bvp, x0, ftol=1e-2, method=self.method,
+                            max_nfev=max_nfev)
+
+        assert_(res.nfev < max_nfev)
+        assert_(res.cost < 0.5)
+
+    def test_error_raised_when_all_tolerances_below_eps(self):
+        # Test that all 0 tolerances are not allowed.
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      method=self.method, ftol=None, xtol=None, gtol=None)
+
+    def test_convergence_with_only_one_tolerance_enabled(self):
+        if self.method == 'lm':
+            return  # should not do test
+        x0 = [-2, 1]
+        x_opt = [1, 1]
+        for ftol, xtol, gtol in [(1e-8, None, None),
+                                  (None, 1e-8, None),
+                                  (None, None, 1e-8)]:
+            res = least_squares(fun_rosenbrock, x0, jac=jac_rosenbrock,
+                                ftol=ftol, gtol=gtol, xtol=xtol,
+                                method=self.method)
+            assert_allclose(res.x, x_opt)
+
+
+class BoundsMixin:
+    def test_inconsistent(self):
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      bounds=(10.0, 0.0), method=self.method)
+
+    def test_infeasible(self):
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      bounds=(3., 4), method=self.method)
+
+    def test_wrong_number(self):
+        assert_raises(ValueError, least_squares, fun_trivial, 2.,
+                      bounds=(1., 2, 3), method=self.method)
+
+    def test_inconsistent_shape(self):
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      bounds=(1.0, [2.0, 3.0]), method=self.method)
+        # 1-D array wont't be broadcasted
+        assert_raises(ValueError, least_squares, fun_rosenbrock, [1.0, 2.0],
+                      bounds=([0.0], [3.0, 4.0]), method=self.method)
+
+    def test_in_bounds(self):
+        for jac in ['2-point', '3-point', 'cs', jac_trivial]:
+            res = least_squares(fun_trivial, 2.0, jac=jac,
+                                bounds=(-1.0, 3.0), method=self.method)
+            assert_allclose(res.x, 0.0, atol=1e-4)
+            assert_equal(res.active_mask, [0])
+            assert_(-1 <= res.x <= 3)
+            res = least_squares(fun_trivial, 2.0, jac=jac,
+                                bounds=(0.5, 3.0), method=self.method)
+            assert_allclose(res.x, 0.5, atol=1e-4)
+            assert_equal(res.active_mask, [-1])
+            assert_(0.5 <= res.x <= 3)
+
+    def test_bounds_shape(self):
+        def get_bounds_direct(lb, ub):
+            return lb, ub
+
+        def get_bounds_instances(lb, ub):
+            return Bounds(lb, ub)
+
+        for jac in ['2-point', '3-point', 'cs', jac_2d_trivial]:
+            for bounds_func in [get_bounds_direct, get_bounds_instances]:
+                x0 = [1.0, 1.0]
+                res = least_squares(fun_2d_trivial, x0, jac=jac)
+                assert_allclose(res.x, [0.0, 0.0])
+                res = least_squares(fun_2d_trivial, x0, jac=jac,
+                                    bounds=bounds_func(0.5, [2.0, 2.0]),
+                                    method=self.method)
+                assert_allclose(res.x, [0.5, 0.5])
+                res = least_squares(fun_2d_trivial, x0, jac=jac,
+                                    bounds=bounds_func([0.3, 0.2], 3.0),
+                                    method=self.method)
+                assert_allclose(res.x, [0.3, 0.2])
+                res = least_squares(
+                    fun_2d_trivial, x0, jac=jac,
+                    bounds=bounds_func([-1, 0.5], [1.0, 3.0]),
+                    method=self.method)
+                assert_allclose(res.x, [0.0, 0.5], atol=1e-5)
+
+    def test_bounds_instances(self):
+        res = least_squares(fun_trivial, 0.5, bounds=Bounds())
+        assert_allclose(res.x, 0.0, atol=1e-4)
+
+        res = least_squares(fun_trivial, 3.0, bounds=Bounds(lb=1.0))
+        assert_allclose(res.x, 1.0, atol=1e-4)
+
+        res = least_squares(fun_trivial, 0.5, bounds=Bounds(lb=-1.0, ub=1.0))
+        assert_allclose(res.x, 0.0, atol=1e-4)
+
+        res = least_squares(fun_trivial, -3.0, bounds=Bounds(ub=-1.0))
+        assert_allclose(res.x, -1.0, atol=1e-4)
+
+        res = least_squares(fun_2d_trivial, [0.5, 0.5],
+                            bounds=Bounds(lb=[-1.0, -1.0], ub=1.0))
+        assert_allclose(res.x, [0.0, 0.0], atol=1e-5)
+
+        res = least_squares(fun_2d_trivial, [0.5, 0.5],
+                            bounds=Bounds(lb=[0.1, 0.1]))
+        assert_allclose(res.x, [0.1, 0.1], atol=1e-5)
+
+    @pytest.mark.fail_slow(5)
+    def test_rosenbrock_bounds(self):
+        x0_1 = np.array([-2.0, 1.0])
+        x0_2 = np.array([2.0, 2.0])
+        x0_3 = np.array([-2.0, 2.0])
+        x0_4 = np.array([0.0, 2.0])
+        x0_5 = np.array([-1.2, 1.0])
+        problems = [
+            (x0_1, ([-np.inf, -1.5], np.inf)),
+            (x0_2, ([-np.inf, 1.5], np.inf)),
+            (x0_3, ([-np.inf, 1.5], np.inf)),
+            (x0_4, ([-np.inf, 1.5], [1.0, np.inf])),
+            (x0_2, ([1.0, 1.5], [3.0, 3.0])),
+            (x0_5, ([-50.0, 0.0], [0.5, 100]))
+        ]
+        for x0, bounds in problems:
+            for jac, x_scale, tr_solver in product(
+                    ['2-point', '3-point', 'cs', jac_rosenbrock],
+                    [1.0, [1.0, 0.5], 'jac'],
+                    ['exact', 'lsmr']):
+                res = least_squares(fun_rosenbrock, x0, jac, bounds,
+                                    x_scale=x_scale, tr_solver=tr_solver,
+                                    method=self.method)
+                assert_allclose(res.optimality, 0.0, atol=1e-5)
+
+
+class SparseMixin:
+    def test_exact_tr_solver(self):
+        p = BroydenTridiagonal()
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      tr_solver='exact', method=self.method)
+        assert_raises(ValueError, least_squares, p.fun, p.x0,
+                      tr_solver='exact', jac_sparsity=p.sparsity,
+                      method=self.method)
+
+    def test_equivalence(self):
+        sparse = BroydenTridiagonal(mode='sparse')
+        dense = BroydenTridiagonal(mode='dense')
+        res_sparse = least_squares(
+            sparse.fun, sparse.x0, jac=sparse.jac,
+            method=self.method)
+        res_dense = least_squares(
+            dense.fun, dense.x0, jac=sparse.jac,
+            method=self.method)
+        assert_equal(res_sparse.nfev, res_dense.nfev)
+        assert_allclose(res_sparse.x, res_dense.x, atol=1e-20)
+        assert_allclose(res_sparse.cost, 0, atol=1e-20)
+        assert_allclose(res_dense.cost, 0, atol=1e-20)
+
+    def test_tr_options(self):
+        p = BroydenTridiagonal()
+        res = least_squares(p.fun, p.x0, p.jac, method=self.method,
+                            tr_options={'btol': 1e-10})
+        assert_allclose(res.cost, 0, atol=1e-20)
+
+    def test_wrong_parameters(self):
+        p = BroydenTridiagonal()
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      tr_solver='best', method=self.method)
+        assert_raises(TypeError, least_squares, p.fun, p.x0, p.jac,
+                      tr_solver='lsmr', tr_options={'tol': 1e-10})
+
+    def test_solver_selection(self):
+        sparse = BroydenTridiagonal(mode='sparse')
+        dense = BroydenTridiagonal(mode='dense')
+        res_sparse = least_squares(sparse.fun, sparse.x0, jac=sparse.jac,
+                                   method=self.method)
+        res_dense = least_squares(dense.fun, dense.x0, jac=dense.jac,
+                                  method=self.method)
+        assert_allclose(res_sparse.cost, 0, atol=1e-20)
+        assert_allclose(res_dense.cost, 0, atol=1e-20)
+        assert_(issparse(res_sparse.jac))
+        assert_(isinstance(res_dense.jac, np.ndarray))
+
+    def test_numerical_jac(self):
+        p = BroydenTridiagonal()
+        for jac in ['2-point', '3-point', 'cs']:
+            res_dense = least_squares(p.fun, p.x0, jac, method=self.method)
+            res_sparse = least_squares(
+                p.fun, p.x0, jac,method=self.method,
+                jac_sparsity=p.sparsity)
+            assert_equal(res_dense.nfev, res_sparse.nfev)
+            assert_allclose(res_dense.x, res_sparse.x, atol=1e-20)
+            assert_allclose(res_dense.cost, 0, atol=1e-20)
+            assert_allclose(res_sparse.cost, 0, atol=1e-20)
+
+    @pytest.mark.fail_slow(5)
+    def test_with_bounds(self):
+        p = BroydenTridiagonal()
+        for jac, jac_sparsity in product(
+                [p.jac, '2-point', '3-point', 'cs'], [None, p.sparsity]):
+            res_1 = least_squares(
+                p.fun, p.x0, jac, bounds=(p.lb, np.inf),
+                method=self.method,jac_sparsity=jac_sparsity)
+            res_2 = least_squares(
+                p.fun, p.x0, jac, bounds=(-np.inf, p.ub),
+                method=self.method, jac_sparsity=jac_sparsity)
+            res_3 = least_squares(
+                p.fun, p.x0, jac, bounds=(p.lb, p.ub),
+                method=self.method, jac_sparsity=jac_sparsity)
+            assert_allclose(res_1.optimality, 0, atol=1e-10)
+            assert_allclose(res_2.optimality, 0, atol=1e-10)
+            assert_allclose(res_3.optimality, 0, atol=1e-10)
+
+    def test_wrong_jac_sparsity(self):
+        p = BroydenTridiagonal()
+        sparsity = p.sparsity[:-1]
+        assert_raises(ValueError, least_squares, p.fun, p.x0,
+                      jac_sparsity=sparsity, method=self.method)
+
+    def test_linear_operator(self):
+        p = BroydenTridiagonal(mode='operator')
+        res = least_squares(p.fun, p.x0, p.jac, method=self.method)
+        assert_allclose(res.cost, 0.0, atol=1e-20)
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      method=self.method, tr_solver='exact')
+
+    def test_x_scale_jac_scale(self):
+        p = BroydenTridiagonal()
+        res = least_squares(p.fun, p.x0, p.jac, method=self.method,
+                            x_scale='jac')
+        assert_allclose(res.cost, 0.0, atol=1e-20)
+
+        p = BroydenTridiagonal(mode='operator')
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      method=self.method, x_scale='jac')
+
+
+class LossFunctionMixin:
+    def test_options(self):
+        for loss in LOSSES:
+            res = least_squares(fun_trivial, 2.0, loss=loss,
+                                method=self.method)
+            assert_allclose(res.x, 0, atol=1e-15)
+
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      loss='hinge', method=self.method)
+
+    def test_fun(self):
+        # Test that res.fun is actual residuals, and not modified by loss
+        # function stuff.
+        for loss in LOSSES:
+            res = least_squares(fun_trivial, 2.0, loss=loss,
+                                method=self.method)
+            assert_equal(res.fun, fun_trivial(res.x))
+
+    def test_grad(self):
+        # Test that res.grad is true gradient of loss function at the
+        # solution. Use max_nfev = 1, to avoid reaching minimum.
+        x = np.array([2.0])  # res.x will be this.
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='linear',
+                            max_nfev=1, method=self.method)
+        assert_equal(res.grad, 2 * x * (x**2 + 5))
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='huber',
+                            max_nfev=1, method=self.method)
+        assert_equal(res.grad, 2 * x)
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='soft_l1',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.grad,
+                        2 * x * (x**2 + 5) / (1 + (x**2 + 5)**2)**0.5)
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='cauchy',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.grad, 2 * x * (x**2 + 5) / (1 + (x**2 + 5)**2))
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='arctan',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.grad, 2 * x * (x**2 + 5) / (1 + (x**2 + 5)**4))
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss=cubic_soft_l1,
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.grad,
+                        2 * x * (x**2 + 5) / (1 + (x**2 + 5)**2)**(2/3))
+
+    def test_jac(self):
+        # Test that res.jac.T.dot(res.jac) gives Gauss-Newton approximation
+        # of Hessian. This approximation is computed by doubly differentiating
+        # the cost function and dropping the part containing second derivative
+        # of f. For a scalar function it is computed as
+        # H = (rho' + 2 * rho'' * f**2) * f'**2, if the expression inside the
+        # brackets is less than EPS it is replaced by EPS. Here, we check
+        # against the root of H.
+
+        x = 2.0  # res.x will be this.
+        f = x**2 + 5  # res.fun will be this.
+
+        res = least_squares(fun_trivial, x, jac_trivial, loss='linear',
+                            max_nfev=1, method=self.method)
+        assert_equal(res.jac, 2 * x)
+
+        # For `huber` loss the Jacobian correction is identically zero
+        # in outlier region, in such cases it is modified to be equal EPS**0.5.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='huber',
+                            max_nfev=1, method=self.method)
+        assert_equal(res.jac, 2 * x * EPS**0.5)
+
+        # Now, let's apply `loss_scale` to turn the residual into an inlier.
+        # The loss function becomes linear.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='huber',
+                            f_scale=10, max_nfev=1)
+        assert_equal(res.jac, 2 * x)
+
+        # 'soft_l1' always gives a positive scaling.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='soft_l1',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.jac, 2 * x * (1 + f**2)**-0.75)
+
+        # For 'cauchy' the correction term turns out to be negative, and it
+        # replaced by EPS**0.5.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='cauchy',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.jac, 2 * x * EPS**0.5)
+
+        # Now use scaling to turn the residual to inlier.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='cauchy',
+                            f_scale=10, max_nfev=1, method=self.method)
+        fs = f / 10
+        assert_allclose(res.jac, 2 * x * (1 - fs**2)**0.5 / (1 + fs**2))
+
+        # 'arctan' gives an outlier.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='arctan',
+                            max_nfev=1, method=self.method)
+        assert_allclose(res.jac, 2 * x * EPS**0.5)
+
+        # Turn to inlier.
+        res = least_squares(fun_trivial, x, jac_trivial, loss='arctan',
+                            f_scale=20.0, max_nfev=1, method=self.method)
+        fs = f / 20
+        assert_allclose(res.jac, 2 * x * (1 - 3 * fs**4)**0.5 / (1 + fs**4))
+
+        # cubic_soft_l1 will give an outlier.
+        res = least_squares(fun_trivial, x, jac_trivial, loss=cubic_soft_l1,
+                            max_nfev=1)
+        assert_allclose(res.jac, 2 * x * EPS**0.5)
+
+        # Turn to inlier.
+        res = least_squares(fun_trivial, x, jac_trivial,
+                            loss=cubic_soft_l1, f_scale=6, max_nfev=1)
+        fs = f / 6
+        assert_allclose(res.jac,
+                        2 * x * (1 - fs**2 / 3)**0.5 * (1 + fs**2)**(-5/6))
+
+    def test_robustness(self):
+        for noise in [0.1, 1.0]:
+            p = ExponentialFittingProblem(1, 0.1, noise, random_seed=0)
+
+            for jac in ['2-point', '3-point', 'cs', p.jac]:
+                res_lsq = least_squares(p.fun, p.p0, jac=jac,
+                                        method=self.method)
+                assert_allclose(res_lsq.optimality, 0, atol=1e-2)
+                for loss in LOSSES:
+                    if loss == 'linear':
+                        continue
+                    res_robust = least_squares(
+                        p.fun, p.p0, jac=jac, loss=loss, f_scale=noise,
+                        method=self.method)
+                    assert_allclose(res_robust.optimality, 0, atol=1e-2)
+                    assert_(norm(res_robust.x - p.p_opt) <
+                            norm(res_lsq.x - p.p_opt))
+
+
+class TestDogbox(BaseMixin, BoundsMixin, SparseMixin, LossFunctionMixin):
+    method = 'dogbox'
+
+
+class TestTRF(BaseMixin, BoundsMixin, SparseMixin, LossFunctionMixin):
+    method = 'trf'
+
+    def test_lsmr_regularization(self):
+        p = BroydenTridiagonal()
+        for regularize in [True, False]:
+            res = least_squares(p.fun, p.x0, p.jac, method='trf',
+                                tr_options={'regularize': regularize})
+            assert_allclose(res.cost, 0, atol=1e-20)
+
+
+class TestLM(BaseMixin):
+    method = 'lm'
+
+    def test_bounds_not_supported(self):
+        assert_raises(ValueError, least_squares, fun_trivial,
+                      2.0, bounds=(-3.0, 3.0), method='lm')
+
+    def test_m_less_n_not_supported(self):
+        x0 = [-2, 1]
+        assert_raises(ValueError, least_squares, fun_rosenbrock_cropped, x0,
+                      method='lm')
+
+    def test_sparse_not_supported(self):
+        p = BroydenTridiagonal()
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      method='lm')
+
+    def test_jac_sparsity_not_supported(self):
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      jac_sparsity=[1], method='lm')
+
+    def test_LinearOperator_not_supported(self):
+        p = BroydenTridiagonal(mode="operator")
+        assert_raises(ValueError, least_squares, p.fun, p.x0, p.jac,
+                      method='lm')
+
+    def test_loss(self):
+        res = least_squares(fun_trivial, 2.0, loss='linear', method='lm')
+        assert_allclose(res.x, 0.0, atol=1e-4)
+
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0,
+                      method='lm', loss='huber')
+
+
+def test_basic():
+    # test that 'method' arg is really optional
+    res = least_squares(fun_trivial, 2.0)
+    assert_allclose(res.x, 0, atol=1e-10)
+
+
+def test_small_tolerances_for_lm():
+    for ftol, xtol, gtol in [(None, 1e-13, 1e-13),
+                             (1e-13, None, 1e-13),
+                             (1e-13, 1e-13, None)]:
+        assert_raises(ValueError, least_squares, fun_trivial, 2.0, xtol=xtol,
+                      ftol=ftol, gtol=gtol, method='lm')
+
+
+def test_fp32_gh12991():
+    # checks that smaller FP sizes can be used in least_squares
+    # this is the minimum working example reported for gh12991
+    np.random.seed(1)
+
+    x = np.linspace(0, 1, 100).astype("float32")
+    y = np.random.random(100).astype("float32")
+
+    def func(p, x):
+        return p[0] + p[1] * x
+
+    def err(p, x, y):
+        return func(p, x) - y
+
+    res = least_squares(err, [-1.0, -1.0], args=(x, y))
+    # previously the initial jacobian calculated for this would be all 0
+    # and the minimize would terminate immediately, with nfev=1, would
+    # report a successful minimization (it shouldn't have done), but be
+    # unchanged from the initial solution.
+    # It was terminating early because the underlying approx_derivative
+    # used a step size for FP64 when the working space was FP32.
+    assert res.nfev > 2
+    assert_allclose(res.x, np.array([0.4082241, 0.15530563]), atol=5e-5)
+
+
+def test_gh_18793_and_19351():
+    answer = 1e-12
+    initial_guess = 1.1e-12
+
+    def chi2(x):
+        return (x-answer)**2
+
+    gtol = 1e-15
+    res = least_squares(chi2, x0=initial_guess, gtol=1e-15, bounds=(0, np.inf))
+    # Original motivation: gh-18793
+    # if we choose an initial condition that is close to the solution
+    # we shouldn't return an answer that is further away from the solution
+
+    # Update: gh-19351
+    # However this requirement does not go well with 'trf' algorithm logic.
+    # Some regressions were reported after the presumed fix.
+    # The returned solution is good as long as it satisfies the convergence
+    # conditions.
+    # Specifically in this case the scaled gradient will be sufficiently low.
+
+    scaling, _ = CL_scaling_vector(res.x, res.grad,
+                                   np.atleast_1d(0), np.atleast_1d(np.inf))
+    assert res.status == 1  # Converged by gradient
+    assert np.linalg.norm(res.grad * scaling, ord=np.inf) < gtol
+
+
+def test_gh_19103():
+    # Checks that least_squares trf method selects a strictly feasible point,
+    # and thus succeeds instead of failing,
+    # when the initial guess is reported exactly at a boundary point.
+    # This is a reduced example from gh191303
+
+    ydata = np.array([0.] * 66 + [
+        1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1.,
+        1., 1., 1., 0., 0., 0., 1., 0., 0., 2., 1.,
+        0., 3., 1., 6., 5., 0., 0., 2., 8., 4., 4.,
+        6., 9., 7., 2., 7., 8., 2., 13., 9., 8., 11.,
+        10., 13., 14., 19., 11., 15., 18., 26., 19., 32., 29.,
+        28., 36., 32., 35., 36., 43., 52., 32., 58., 56., 52.,
+        67., 53., 72., 88., 77., 95., 94., 84., 86., 101., 107.,
+        108., 118., 96., 115., 138., 137.,
+    ])
+    xdata = np.arange(0, ydata.size) * 0.1
+
+    def exponential_wrapped(params):
+        A, B, x0 = params
+        return A * np.exp(B * (xdata - x0)) - ydata
+
+    x0 = [0.01, 1., 5.]
+    bounds = ((0.01, 0, 0), (np.inf, 10, 20.9))
+    res = least_squares(exponential_wrapped, x0, method='trf', bounds=bounds)
+    assert res.success
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linear_assignment.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linear_assignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..d59792da9eef38e313eaa0bca70f873627f8d3cf
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linear_assignment.py
@@ -0,0 +1,116 @@
+# Author: Brian M. Clapper, G. Varoquaux, Lars Buitinck
+# License: BSD
+
+from numpy.testing import assert_array_equal
+import pytest
+
+import numpy as np
+
+from scipy.optimize import linear_sum_assignment
+from scipy.sparse import random
+from scipy.sparse._sputils import matrix
+from scipy.sparse.csgraph import min_weight_full_bipartite_matching
+from scipy.sparse.csgraph.tests.test_matching import (
+    linear_sum_assignment_assertions, linear_sum_assignment_test_cases
+)
+
+
+def test_linear_sum_assignment_input_shape():
+    with pytest.raises(ValueError, match="expected a matrix"):
+        linear_sum_assignment([1, 2, 3])
+
+
+def test_linear_sum_assignment_input_object():
+    C = [[1, 2, 3], [4, 5, 6]]
+    assert_array_equal(linear_sum_assignment(C),
+                       linear_sum_assignment(np.asarray(C)))
+    assert_array_equal(linear_sum_assignment(C),
+                       linear_sum_assignment(matrix(C)))
+
+
+def test_linear_sum_assignment_input_bool():
+    I = np.identity(3)
+    assert_array_equal(linear_sum_assignment(I.astype(np.bool_)),
+                       linear_sum_assignment(I))
+
+
+def test_linear_sum_assignment_input_string():
+    I = np.identity(3)
+    with pytest.raises(TypeError, match="Cannot cast array data"):
+        linear_sum_assignment(I.astype(str))
+
+
+def test_linear_sum_assignment_input_nan():
+    I = np.diag([np.nan, 1, 1])
+    with pytest.raises(ValueError, match="contains invalid numeric entries"):
+        linear_sum_assignment(I)
+
+
+def test_linear_sum_assignment_input_neginf():
+    I = np.diag([1, -np.inf, 1])
+    with pytest.raises(ValueError, match="contains invalid numeric entries"):
+        linear_sum_assignment(I)
+
+
+def test_linear_sum_assignment_input_inf():
+    I = np.identity(3)
+    I[:, 0] = np.inf
+    with pytest.raises(ValueError, match="cost matrix is infeasible"):
+        linear_sum_assignment(I)
+
+
+def test_constant_cost_matrix():
+    # Fixes #11602
+    n = 8
+    C = np.ones((n, n))
+    row_ind, col_ind = linear_sum_assignment(C)
+    assert_array_equal(row_ind, np.arange(n))
+    assert_array_equal(col_ind, np.arange(n))
+
+
+@pytest.mark.parametrize('num_rows,num_cols', [(0, 0), (2, 0), (0, 3)])
+def test_linear_sum_assignment_trivial_cost(num_rows, num_cols):
+    C = np.empty(shape=(num_cols, num_rows))
+    row_ind, col_ind = linear_sum_assignment(C)
+    assert len(row_ind) == 0
+    assert len(col_ind) == 0
+
+
+@pytest.mark.parametrize('sign,test_case', linear_sum_assignment_test_cases)
+def test_linear_sum_assignment_small_inputs(sign, test_case):
+    linear_sum_assignment_assertions(
+        linear_sum_assignment, np.array, sign, test_case)
+
+
+# Tests that combine scipy.optimize.linear_sum_assignment and
+# scipy.sparse.csgraph.min_weight_full_bipartite_matching
+def test_two_methods_give_same_result_on_many_sparse_inputs():
+    # As opposed to the test above, here we do not spell out the expected
+    # output; only assert that the two methods give the same result.
+    # Concretely, the below tests 100 cases of size 100x100, out of which
+    # 36 are infeasible.
+    np.random.seed(1234)
+    for _ in range(100):
+        lsa_raises = False
+        mwfbm_raises = False
+        sparse = random(100, 100, density=0.06,
+                        data_rvs=lambda size: np.random.randint(1, 100, size))
+        # In csgraph, zeros correspond to missing edges, so we explicitly
+        # replace those with infinities
+        dense = np.full(sparse.shape, np.inf)
+        dense[sparse.row, sparse.col] = sparse.data
+        sparse = sparse.tocsr()
+        try:
+            row_ind, col_ind = linear_sum_assignment(dense)
+            lsa_cost = dense[row_ind, col_ind].sum()
+        except ValueError:
+            lsa_raises = True
+        try:
+            row_ind, col_ind = min_weight_full_bipartite_matching(sparse)
+            mwfbm_cost = sparse[row_ind, col_ind].sum()
+        except ValueError:
+            mwfbm_raises = True
+        # Ensure that if one method raises, so does the other one.
+        assert lsa_raises == mwfbm_raises
+        if not lsa_raises:
+            assert lsa_cost == mwfbm_cost
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linprog.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linprog.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e304cd038ad37fde7c613b3daf1bd95abdf5547
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_linprog.py
@@ -0,0 +1,2498 @@
+"""
+Unit test for Linear Programming
+"""
+import sys
+import platform
+
+import numpy as np
+from numpy.testing import (assert_, assert_allclose, assert_equal,
+                           assert_array_less, assert_warns, suppress_warnings)
+from pytest import raises as assert_raises
+from scipy.optimize import linprog, OptimizeWarning
+from scipy.optimize._numdiff import approx_derivative
+from scipy.sparse.linalg import MatrixRankWarning
+from scipy.linalg import LinAlgWarning
+from scipy._lib._util import VisibleDeprecationWarning
+import scipy.sparse
+import pytest
+
+has_umfpack = True
+try:
+    from scikits.umfpack import UmfpackWarning
+except ImportError:
+    has_umfpack = False
+
+has_cholmod = True
+try:
+    import sksparse  # noqa: F401
+    from sksparse.cholmod import cholesky as cholmod  # noqa: F401
+except ImportError:
+    has_cholmod = False
+
+
+def _assert_iteration_limit_reached(res, maxiter):
+    assert_(not res.success, "Incorrectly reported success")
+    assert_(res.success < maxiter, "Incorrectly reported number of iterations")
+    assert_equal(res.status, 1, "Failed to report iteration limit reached")
+
+
+def _assert_infeasible(res):
+    # res: linprog result object
+    assert_(not res.success, "incorrectly reported success")
+    assert_equal(res.status, 2, "failed to report infeasible status")
+
+
+def _assert_unbounded(res):
+    # res: linprog result object
+    assert_(not res.success, "incorrectly reported success")
+    assert_equal(res.status, 3, "failed to report unbounded status")
+
+
+def _assert_unable_to_find_basic_feasible_sol(res):
+    # res: linprog result object
+
+    # The status may be either 2 or 4 depending on why the feasible solution
+    # could not be found. If the underlying problem is expected to not have a
+    # feasible solution, _assert_infeasible should be used.
+    assert_(not res.success, "incorrectly reported success")
+    assert_(res.status in (2, 4), "failed to report optimization failure")
+
+
+def _assert_success(res, desired_fun=None, desired_x=None,
+                    rtol=1e-8, atol=1e-8):
+    # res: linprog result object
+    # desired_fun: desired objective function value or None
+    # desired_x: desired solution or None
+    if not res.success:
+        msg = f"linprog status {res.status}, message: {res.message}"
+        raise AssertionError(msg)
+
+    assert_equal(res.status, 0)
+    if desired_fun is not None:
+        assert_allclose(res.fun, desired_fun,
+                        err_msg="converged to an unexpected objective value",
+                        rtol=rtol, atol=atol)
+    if desired_x is not None:
+        assert_allclose(res.x, desired_x,
+                        err_msg="converged to an unexpected solution",
+                        rtol=rtol, atol=atol)
+
+
+def magic_square(n):
+    """
+    Generates a linear program for which integer solutions represent an
+    n x n magic square; binary decision variables represent the presence
+    (or absence) of an integer 1 to n^2 in each position of the square.
+    """
+
+    np.random.seed(0)
+    M = n * (n**2 + 1) / 2
+
+    numbers = np.arange(n**4) // n**2 + 1
+
+    numbers = numbers.reshape(n**2, n, n)
+
+    zeros = np.zeros((n**2, n, n))
+
+    A_list = []
+    b_list = []
+
+    # Rule 1: use every number exactly once
+    for i in range(n**2):
+        A_row = zeros.copy()
+        A_row[i, :, :] = 1
+        A_list.append(A_row.flatten())
+        b_list.append(1)
+
+    # Rule 2: Only one number per square
+    for i in range(n):
+        for j in range(n):
+            A_row = zeros.copy()
+            A_row[:, i, j] = 1
+            A_list.append(A_row.flatten())
+            b_list.append(1)
+
+    # Rule 3: sum of rows is M
+    for i in range(n):
+        A_row = zeros.copy()
+        A_row[:, i, :] = numbers[:, i, :]
+        A_list.append(A_row.flatten())
+        b_list.append(M)
+
+    # Rule 4: sum of columns is M
+    for i in range(n):
+        A_row = zeros.copy()
+        A_row[:, :, i] = numbers[:, :, i]
+        A_list.append(A_row.flatten())
+        b_list.append(M)
+
+    # Rule 5: sum of diagonals is M
+    A_row = zeros.copy()
+    A_row[:, range(n), range(n)] = numbers[:, range(n), range(n)]
+    A_list.append(A_row.flatten())
+    b_list.append(M)
+    A_row = zeros.copy()
+    A_row[:, range(n), range(-1, -n - 1, -1)] = \
+        numbers[:, range(n), range(-1, -n - 1, -1)]
+    A_list.append(A_row.flatten())
+    b_list.append(M)
+
+    A = np.array(np.vstack(A_list), dtype=float)
+    b = np.array(b_list, dtype=float)
+    c = np.random.rand(A.shape[1])
+
+    return A, b, c, numbers, M
+
+
+def lpgen_2d(m, n):
+    """ -> A b c LP test: m*n vars, m+n constraints
+        row sums == n/m, col sums == 1
+        https://gist.github.com/denis-bz/8647461
+    """
+    np.random.seed(0)
+    c = - np.random.exponential(size=(m, n))
+    Arow = np.zeros((m, m * n))
+    brow = np.zeros(m)
+    for j in range(m):
+        j1 = j + 1
+        Arow[j, j * n:j1 * n] = 1
+        brow[j] = n / m
+
+    Acol = np.zeros((n, m * n))
+    bcol = np.zeros(n)
+    for j in range(n):
+        j1 = j + 1
+        Acol[j, j::n] = 1
+        bcol[j] = 1
+
+    A = np.vstack((Arow, Acol))
+    b = np.hstack((brow, bcol))
+
+    return A, b, c.ravel()
+
+
+def very_random_gen(seed=0):
+    np.random.seed(seed)
+    m_eq, m_ub, n = 10, 20, 50
+    c = np.random.rand(n)-0.5
+    A_ub = np.random.rand(m_ub, n)-0.5
+    b_ub = np.random.rand(m_ub)-0.5
+    A_eq = np.random.rand(m_eq, n)-0.5
+    b_eq = np.random.rand(m_eq)-0.5
+    lb = -np.random.rand(n)
+    ub = np.random.rand(n)
+    lb[lb < -np.random.rand()] = -np.inf
+    ub[ub > np.random.rand()] = np.inf
+    bounds = np.vstack((lb, ub)).T
+    return c, A_ub, b_ub, A_eq, b_eq, bounds
+
+
+def nontrivial_problem():
+    c = [-1, 8, 4, -6]
+    A_ub = [[-7, -7, 6, 9],
+            [1, -1, -3, 0],
+            [10, -10, -7, 7],
+            [6, -1, 3, 4]]
+    b_ub = [-3, 6, -6, 6]
+    A_eq = [[-10, 1, 1, -8]]
+    b_eq = [-4]
+    x_star = [101 / 1391, 1462 / 1391, 0, 752 / 1391]
+    f_star = 7083 / 1391
+    return c, A_ub, b_ub, A_eq, b_eq, x_star, f_star
+
+
+def l1_regression_prob(seed=0, m=8, d=9, n=100):
+    '''
+    Training data is {(x0, y0), (x1, y2), ..., (xn-1, yn-1)}
+        x in R^d
+        y in R
+    n: number of training samples
+    d: dimension of x, i.e. x in R^d
+    phi: feature map R^d -> R^m
+    m: dimension of feature space
+    '''
+    np.random.seed(seed)
+    phi = np.random.normal(0, 1, size=(m, d))  # random feature mapping
+    w_true = np.random.randn(m)
+    x = np.random.normal(0, 1, size=(d, n))  # features
+    y = w_true @ (phi @ x) + np.random.normal(0, 1e-5, size=n)  # measurements
+
+    # construct the problem
+    c = np.ones(m+n)
+    c[:m] = 0
+    A_ub = scipy.sparse.lil_matrix((2*n, n+m))
+    idx = 0
+    for ii in range(n):
+        A_ub[idx, :m] = phi @ x[:, ii]
+        A_ub[idx, m+ii] = -1
+        A_ub[idx+1, :m] = -1*phi @ x[:, ii]
+        A_ub[idx+1, m+ii] = -1
+        idx += 2
+    A_ub = A_ub.tocsc()
+    b_ub = np.zeros(2*n)
+    b_ub[0::2] = y
+    b_ub[1::2] = -y
+    bnds = [(None, None)]*m + [(0, None)]*n
+    return c, A_ub, b_ub, bnds
+
+
+def generic_callback_test(self):
+    # Check that callback is as advertised
+    last_cb = {}
+
+    def cb(res):
+        message = res.pop('message')
+        complete = res.pop('complete')
+
+        assert_(res.pop('phase') in (1, 2))
+        assert_(res.pop('status') in range(4))
+        assert_(isinstance(res.pop('nit'), int))
+        assert_(isinstance(complete, bool))
+        assert_(isinstance(message, str))
+
+        last_cb['x'] = res['x']
+        last_cb['fun'] = res['fun']
+        last_cb['slack'] = res['slack']
+        last_cb['con'] = res['con']
+
+    c = np.array([-3, -2])
+    A_ub = [[2, 1], [1, 1], [1, 0]]
+    b_ub = [10, 8, 4]
+    res = linprog(c, A_ub=A_ub, b_ub=b_ub, callback=cb, method=self.method)
+
+    _assert_success(res, desired_fun=-18.0, desired_x=[2, 6])
+    assert_allclose(last_cb['fun'], res['fun'])
+    assert_allclose(last_cb['x'], res['x'])
+    assert_allclose(last_cb['con'], res['con'])
+    assert_allclose(last_cb['slack'], res['slack'])
+
+
+def test_unknown_solvers_and_options():
+    c = np.array([-3, -2])
+    A_ub = [[2, 1], [1, 1], [1, 0]]
+    b_ub = [10, 8, 4]
+
+    assert_raises(ValueError, linprog,
+                  c, A_ub=A_ub, b_ub=b_ub, method='ekki-ekki-ekki')
+    assert_raises(ValueError, linprog,
+                  c, A_ub=A_ub, b_ub=b_ub, method='highs-ekki')
+    message = "Unrecognized options detected: {'rr_method': 'ekki-ekki-ekki'}"
+    with pytest.warns(OptimizeWarning, match=message):
+        linprog(c, A_ub=A_ub, b_ub=b_ub,
+                options={"rr_method": 'ekki-ekki-ekki'})
+
+
+def test_choose_solver():
+    # 'highs' chooses 'dual'
+    c = np.array([-3, -2])
+    A_ub = [[2, 1], [1, 1], [1, 0]]
+    b_ub = [10, 8, 4]
+
+    res = linprog(c, A_ub, b_ub, method='highs')
+    _assert_success(res, desired_fun=-18.0, desired_x=[2, 6])
+
+
+def test_deprecation():
+    with pytest.warns(DeprecationWarning):
+        linprog(1, method='interior-point')
+    with pytest.warns(DeprecationWarning):
+        linprog(1, method='revised simplex')
+    with pytest.warns(DeprecationWarning):
+        linprog(1, method='simplex')
+
+
+def test_highs_status_message():
+    res = linprog(1, method='highs')
+    msg = "Optimization terminated successfully. (HiGHS Status 7:"
+    assert res.status == 0
+    assert res.message.startswith(msg)
+
+    A, b, c, numbers, M = magic_square(6)
+    bounds = [(0, 1)] * len(c)
+    integrality = [1] * len(c)
+    options = {"time_limit": 0.1}
+    res = linprog(c=c, A_eq=A, b_eq=b, bounds=bounds, method='highs',
+                  options=options, integrality=integrality)
+    msg = "Time limit reached. (HiGHS Status 13:"
+    assert res.status == 1
+    assert res.message.startswith(msg)
+
+    options = {"maxiter": 10}
+    res = linprog(c=c, A_eq=A, b_eq=b, bounds=bounds, method='highs-ds',
+                  options=options)
+    msg = "Iteration limit reached. (HiGHS Status 14:"
+    assert res.status == 1
+    assert res.message.startswith(msg)
+
+    res = linprog(1, bounds=(1, -1), method='highs')
+    msg = "The problem is infeasible. (HiGHS Status 8:"
+    assert res.status == 2
+    assert res.message.startswith(msg)
+
+    res = linprog(-1, method='highs')
+    msg = "The problem is unbounded. (HiGHS Status 10:"
+    assert res.status == 3
+    assert res.message.startswith(msg)
+
+    from scipy.optimize._linprog_highs import _highs_to_scipy_status_message
+    status, message = _highs_to_scipy_status_message(58, "Hello!")
+    msg = "The HiGHS status code was not recognized. (HiGHS Status 58:"
+    assert status == 4
+    assert message.startswith(msg)
+
+    status, message = _highs_to_scipy_status_message(None, None)
+    msg = "HiGHS did not provide a status code. (HiGHS Status None: None)"
+    assert status == 4
+    assert message.startswith(msg)
+
+
+def test_bug_17380():
+    linprog([1, 1], A_ub=[[-1, 0]], b_ub=[-2.5], integrality=[1, 1])
+
+
+A_ub = None
+b_ub = None
+A_eq = None
+b_eq = None
+bounds = None
+
+################
+# Common Tests #
+################
+
+
+class LinprogCommonTests:
+    """
+    Base class for `linprog` tests. Generally, each test will be performed
+    once for every derived class of LinprogCommonTests, each of which will
+    typically change self.options and/or self.method. Effectively, these tests
+    are run for many combination of method (simplex, revised simplex, and
+    interior point) and options (such as pivoting rule or sparse treatment).
+    """
+
+    ##################
+    # Targeted Tests #
+    ##################
+
+    def test_callback(self):
+        generic_callback_test(self)
+
+    def test_disp(self):
+        # test that display option does not break anything.
+        A, b, c = lpgen_2d(20, 20)
+        res = linprog(c, A_ub=A, b_ub=b, method=self.method,
+                      options={"disp": True})
+        _assert_success(res, desired_fun=-64.049494229)
+
+    def test_docstring_example(self):
+        # Example from linprog docstring.
+        c = [-1, 4]
+        A = [[-3, 1], [1, 2]]
+        b = [6, 4]
+        x0_bounds = (None, None)
+        x1_bounds = (-3, None)
+        res = linprog(c, A_ub=A, b_ub=b, bounds=(x0_bounds, x1_bounds),
+                      options=self.options, method=self.method)
+        _assert_success(res, desired_fun=-22)
+
+    def test_type_error(self):
+        # (presumably) checks that linprog recognizes type errors
+        # This is tested more carefully in test__linprog_clean_inputs.py
+        c = [1]
+        A_eq = [[1]]
+        b_eq = "hello"
+        assert_raises(TypeError, linprog,
+                      c, A_eq=A_eq, b_eq=b_eq,
+                      method=self.method, options=self.options)
+
+    def test_aliasing_b_ub(self):
+        # (presumably) checks that linprog does not modify b_ub
+        # This is tested more carefully in test__linprog_clean_inputs.py
+        c = np.array([1.0])
+        A_ub = np.array([[1.0]])
+        b_ub_orig = np.array([3.0])
+        b_ub = b_ub_orig.copy()
+        bounds = (-4.0, np.inf)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-4, desired_x=[-4])
+        assert_allclose(b_ub_orig, b_ub)
+
+    def test_aliasing_b_eq(self):
+        # (presumably) checks that linprog does not modify b_eq
+        # This is tested more carefully in test__linprog_clean_inputs.py
+        c = np.array([1.0])
+        A_eq = np.array([[1.0]])
+        b_eq_orig = np.array([3.0])
+        b_eq = b_eq_orig.copy()
+        bounds = (-4.0, np.inf)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=3, desired_x=[3])
+        assert_allclose(b_eq_orig, b_eq)
+
+    def test_non_ndarray_args(self):
+        # (presumably) checks that linprog accepts list in place of arrays
+        # This is tested more carefully in test__linprog_clean_inputs.py
+        c = [1.0]
+        A_ub = [[1.0]]
+        b_ub = [3.0]
+        A_eq = [[1.0]]
+        b_eq = [2.0]
+        bounds = (-1.0, 10.0)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=2, desired_x=[2])
+
+    def test_unknown_options(self):
+        c = np.array([-3, -2])
+        A_ub = [[2, 1], [1, 1], [1, 0]]
+        b_ub = [10, 8, 4]
+
+        def f(c, A_ub=None, b_ub=None, A_eq=None,
+              b_eq=None, bounds=None, options={}):
+            linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                    method=self.method, options=options)
+
+        o = {key: self.options[key] for key in self.options}
+        o['spam'] = 42
+
+        assert_warns(OptimizeWarning, f,
+                     c, A_ub=A_ub, b_ub=b_ub, options=o)
+
+    def test_integrality_without_highs(self):
+        # ensure that using `integrality` parameter without `method='highs'`
+        # raises warning and produces correct solution to relaxed problem
+        # source: https://en.wikipedia.org/wiki/Integer_programming#Example
+        A_ub = np.array([[-1, 1], [3, 2], [2, 3]])
+        b_ub = np.array([1, 12, 12])
+        c = -np.array([0, 1])
+
+        bounds = [(0, np.inf)] * len(c)
+        integrality = [1] * len(c)
+
+        with np.testing.assert_warns(OptimizeWarning):
+            res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds,
+                          method=self.method, integrality=integrality)
+
+        np.testing.assert_allclose(res.x, [1.8, 2.8])
+        np.testing.assert_allclose(res.fun, -2.8)
+
+    def test_invalid_inputs(self):
+
+        def f(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, bounds=None):
+            linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                    method=self.method, options=self.options)
+
+        # Test ill-formatted bounds
+        assert_raises(ValueError, f, [1, 2, 3], bounds=[(1, 2), (3, 4)])
+        with np.testing.suppress_warnings() as sup:
+            sup.filter(VisibleDeprecationWarning, "Creating an ndarray from ragged")
+            assert_raises(ValueError, f, [1, 2, 3], bounds=[(1, 2), (3, 4), (3, 4, 5)])
+        assert_raises(ValueError, f, [1, 2, 3], bounds=[(1, -2), (1, 2)])
+
+        # Test other invalid inputs
+        assert_raises(ValueError, f, [1, 2], A_ub=[[1, 2]], b_ub=[1, 2])
+        assert_raises(ValueError, f, [1, 2], A_ub=[[1]], b_ub=[1])
+        assert_raises(ValueError, f, [1, 2], A_eq=[[1, 2]], b_eq=[1, 2])
+        assert_raises(ValueError, f, [1, 2], A_eq=[[1]], b_eq=[1])
+        assert_raises(ValueError, f, [1, 2], A_eq=[1], b_eq=1)
+
+        # this last check doesn't make sense for sparse presolve
+        if ("_sparse_presolve" in self.options and
+                self.options["_sparse_presolve"]):
+            return
+            # there aren't 3-D sparse matrices
+
+        assert_raises(ValueError, f, [1, 2], A_ub=np.zeros((1, 1, 3)), b_eq=1)
+
+    def test_sparse_constraints(self):
+        # gh-13559: improve error message for sparse inputs when unsupported
+        def f(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, bounds=None):
+            linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                    method=self.method, options=self.options)
+
+        np.random.seed(0)
+        m = 100
+        n = 150
+        A_eq = scipy.sparse.rand(m, n, 0.5)
+        x_valid = np.random.randn(n)
+        c = np.random.randn(n)
+        ub = x_valid + np.random.rand(n)
+        lb = x_valid - np.random.rand(n)
+        bounds = np.column_stack((lb, ub))
+        b_eq = A_eq * x_valid
+
+        if self.method in {'simplex', 'revised simplex'}:
+            # simplex and revised simplex should raise error
+            with assert_raises(ValueError, match=f"Method '{self.method}' "
+                               "does not support sparse constraint matrices."):
+                linprog(c=c, A_eq=A_eq, b_eq=b_eq, bounds=bounds,
+                        method=self.method, options=self.options)
+        else:
+            # other methods should succeed
+            options = {**self.options}
+            if self.method in {'interior-point'}:
+                options['sparse'] = True
+
+            res = linprog(c=c, A_eq=A_eq, b_eq=b_eq, bounds=bounds,
+                          method=self.method, options=options)
+            assert res.success
+
+    def test_maxiter(self):
+        # test iteration limit w/ Enzo example
+        c = [4, 8, 3, 0, 0, 0]
+        A = [
+            [2, 5, 3, -1, 0, 0],
+            [3, 2.5, 8, 0, -1, 0],
+            [8, 10, 4, 0, 0, -1]]
+        b = [185, 155, 600]
+        np.random.seed(0)
+        maxiter = 3
+        res = linprog(c, A_eq=A, b_eq=b, method=self.method,
+                      options={"maxiter": maxiter})
+        _assert_iteration_limit_reached(res, maxiter)
+        assert_equal(res.nit, maxiter)
+
+    def test_bounds_fixed(self):
+
+        # Test fixed bounds (upper equal to lower)
+        # If presolve option True, test if solution found in presolve (i.e.
+        # number of iterations is 0).
+        do_presolve = self.options.get('presolve', True)
+
+        res = linprog([1], bounds=(1, 1),
+                      method=self.method, options=self.options)
+        _assert_success(res, 1, 1)
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+        res = linprog([1, 2, 3], bounds=[(5, 5), (-1, -1), (3, 3)],
+                      method=self.method, options=self.options)
+        _assert_success(res, 12, [5, -1, 3])
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+        res = linprog([1, 1], bounds=[(1, 1), (1, 3)],
+                      method=self.method, options=self.options)
+        _assert_success(res, 2, [1, 1])
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+        res = linprog([1, 1, 2], A_eq=[[1, 0, 0], [0, 1, 0]], b_eq=[1, 7],
+                      bounds=[(-5, 5), (0, 10), (3.5, 3.5)],
+                      method=self.method, options=self.options)
+        _assert_success(res, 15, [1, 7, 3.5])
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+    def test_bounds_infeasible(self):
+
+        # Test ill-valued bounds (upper less than lower)
+        # If presolve option True, test if solution found in presolve (i.e.
+        # number of iterations is 0).
+        do_presolve = self.options.get('presolve', True)
+
+        res = linprog([1], bounds=(1, -2), method=self.method, options=self.options)
+        _assert_infeasible(res)
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+        res = linprog([1], bounds=[(1, -2)], method=self.method, options=self.options)
+        _assert_infeasible(res)
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+        res = linprog([1, 2, 3], bounds=[(5, 0), (1, 2), (3, 4)],
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+        if do_presolve:
+            assert_equal(res.nit, 0)
+
+    def test_bounds_infeasible_2(self):
+
+        # Test ill-valued bounds (lower inf, upper -inf)
+        # If presolve option True, test if solution found in presolve (i.e.
+        # number of iterations is 0).
+        # For the simplex method, the cases do not result in an
+        # infeasible status, but in a RuntimeWarning. This is a
+        # consequence of having _presolve() take care of feasibility
+        # checks. See issue gh-11618.
+        do_presolve = self.options.get('presolve', True)
+        simplex_without_presolve = not do_presolve and self.method == 'simplex'
+
+        c = [1, 2, 3]
+        bounds_1 = [(1, 2), (np.inf, np.inf), (3, 4)]
+        bounds_2 = [(1, 2), (-np.inf, -np.inf), (3, 4)]
+
+        if simplex_without_presolve:
+            def g(c, bounds):
+                res = linprog(c, bounds=bounds,
+                              method=self.method, options=self.options)
+                return res
+
+            with pytest.warns(RuntimeWarning):
+                with pytest.raises(IndexError):
+                    g(c, bounds=bounds_1)
+
+            with pytest.warns(RuntimeWarning):
+                with pytest.raises(IndexError):
+                    g(c, bounds=bounds_2)
+        else:
+            res = linprog(c=c, bounds=bounds_1,
+                          method=self.method, options=self.options)
+            _assert_infeasible(res)
+            if do_presolve:
+                assert_equal(res.nit, 0)
+            res = linprog(c=c, bounds=bounds_2,
+                          method=self.method, options=self.options)
+            _assert_infeasible(res)
+            if do_presolve:
+                assert_equal(res.nit, 0)
+
+    def test_empty_constraint_1(self):
+        c = [-1, -2]
+        res = linprog(c, method=self.method, options=self.options)
+        _assert_unbounded(res)
+
+    def test_empty_constraint_2(self):
+        c = [-1, 1, -1, 1]
+        bounds = [(0, np.inf), (-np.inf, 0), (-1, 1), (-1, 1)]
+        res = linprog(c, bounds=bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+        # Unboundedness detected in presolve requires no iterations
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_empty_constraint_3(self):
+        c = [1, -1, 1, -1]
+        bounds = [(0, np.inf), (-np.inf, 0), (-1, 1), (-1, 1)]
+        res = linprog(c, bounds=bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[0, 0, -1, 1], desired_fun=-2)
+
+    def test_inequality_constraints(self):
+        # Minimize linear function subject to linear inequality constraints.
+        #  http://www.dam.brown.edu/people/huiwang/classes/am121/Archive/simplex_121_c.pdf
+        c = np.array([3, 2]) * -1  # maximize
+        A_ub = [[2, 1],
+                [1, 1],
+                [1, 0]]
+        b_ub = [10, 8, 4]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-18, desired_x=[2, 6])
+
+    def test_inequality_constraints2(self):
+        # Minimize linear function subject to linear inequality constraints.
+        # http://www.statslab.cam.ac.uk/~ff271/teaching/opt/notes/notes8.pdf
+        # (dead link)
+        c = [6, 3]
+        A_ub = [[0, 3],
+                [-1, -1],
+                [-2, 1]]
+        b_ub = [2, -1, -1]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=5, desired_x=[2 / 3, 1 / 3])
+
+    def test_bounds_simple(self):
+        c = [1, 2]
+        bounds = (1, 2)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[1, 1])
+
+        bounds = [(1, 2), (1, 2)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[1, 1])
+
+    def test_bounded_below_only_1(self):
+        c = np.array([1.0])
+        A_eq = np.array([[1.0]])
+        b_eq = np.array([3.0])
+        bounds = (1.0, None)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=3, desired_x=[3])
+
+    def test_bounded_below_only_2(self):
+        c = np.ones(3)
+        A_eq = np.eye(3)
+        b_eq = np.array([1, 2, 3])
+        bounds = (0.5, np.inf)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=b_eq, desired_fun=np.sum(b_eq))
+
+    def test_bounded_above_only_1(self):
+        c = np.array([1.0])
+        A_eq = np.array([[1.0]])
+        b_eq = np.array([3.0])
+        bounds = (None, 10.0)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=3, desired_x=[3])
+
+    def test_bounded_above_only_2(self):
+        c = np.ones(3)
+        A_eq = np.eye(3)
+        b_eq = np.array([1, 2, 3])
+        bounds = (-np.inf, 4)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=b_eq, desired_fun=np.sum(b_eq))
+
+    def test_bounds_infinity(self):
+        c = np.ones(3)
+        A_eq = np.eye(3)
+        b_eq = np.array([1, 2, 3])
+        bounds = (-np.inf, np.inf)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=b_eq, desired_fun=np.sum(b_eq))
+
+    def test_bounds_mixed(self):
+        # Problem has one unbounded variable and
+        # another with a negative lower bound.
+        c = np.array([-1, 4]) * -1  # maximize
+        A_ub = np.array([[-3, 1],
+                         [1, 2]], dtype=np.float64)
+        b_ub = [6, 4]
+        x0_bounds = (-np.inf, np.inf)
+        x1_bounds = (-3, np.inf)
+        bounds = (x0_bounds, x1_bounds)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-80 / 7, desired_x=[-8 / 7, 18 / 7])
+
+    def test_bounds_equal_but_infeasible(self):
+        c = [-4, 1]
+        A_ub = [[7, -2], [0, 1], [2, -2]]
+        b_ub = [14, 0, 3]
+        bounds = [(2, 2), (0, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+    def test_bounds_equal_but_infeasible2(self):
+        c = [-4, 1]
+        A_eq = [[7, -2], [0, 1], [2, -2]]
+        b_eq = [14, 0, 3]
+        bounds = [(2, 2), (0, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+    def test_bounds_equal_no_presolve(self):
+        # There was a bug when a lower and upper bound were equal but
+        # presolve was not on to eliminate the variable. The bound
+        # was being converted to an equality constraint, but the bound
+        # was not eliminated, leading to issues in postprocessing.
+        c = [1, 2]
+        A_ub = [[1, 2], [1.1, 2.2]]
+        b_ub = [4, 8]
+        bounds = [(1, 2), (2, 2)]
+
+        o = {key: self.options[key] for key in self.options}
+        o["presolve"] = False
+
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=o)
+        _assert_infeasible(res)
+
+    def test_zero_column_1(self):
+        m, n = 3, 4
+        np.random.seed(0)
+        c = np.random.rand(n)
+        c[1] = 1
+        A_eq = np.random.rand(m, n)
+        A_eq[:, 1] = 0
+        b_eq = np.random.rand(m)
+        A_ub = [[1, 0, 1, 1]]
+        b_ub = 3
+        bounds = [(-10, 10), (-10, 10), (-10, None), (None, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-9.7087836730413404)
+
+    def test_zero_column_2(self):
+        if self.method in {'highs-ds', 'highs-ipm'}:
+            # See upstream issue https://github.com/ERGO-Code/HiGHS/issues/648
+            pytest.xfail()
+
+        np.random.seed(0)
+        m, n = 2, 4
+        c = np.random.rand(n)
+        c[1] = -1
+        A_eq = np.random.rand(m, n)
+        A_eq[:, 1] = 0
+        b_eq = np.random.rand(m)
+
+        A_ub = np.random.rand(m, n)
+        A_ub[:, 1] = 0
+        b_ub = np.random.rand(m)
+        bounds = (None, None)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+        # Unboundedness detected in presolve
+        if self.options.get('presolve', True) and "highs" not in self.method:
+            # HiGHS detects unboundedness or infeasibility in presolve
+            # It needs an iteration of simplex to be sure of unboundedness
+            # Other solvers report that the problem is unbounded if feasible
+            assert_equal(res.nit, 0)
+
+    def test_zero_row_1(self):
+        c = [1, 2, 3]
+        A_eq = [[0, 0, 0], [1, 1, 1], [0, 0, 0]]
+        b_eq = [0, 3, 0]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=3)
+
+    def test_zero_row_2(self):
+        A_ub = [[0, 0, 0], [1, 1, 1], [0, 0, 0]]
+        b_ub = [0, 3, 0]
+        c = [1, 2, 3]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=0)
+
+    def test_zero_row_3(self):
+        m, n = 2, 4
+        c = np.random.rand(n)
+        A_eq = np.random.rand(m, n)
+        A_eq[0, :] = 0
+        b_eq = np.random.rand(m)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+        # Infeasibility detected in presolve
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_zero_row_4(self):
+        m, n = 2, 4
+        c = np.random.rand(n)
+        A_ub = np.random.rand(m, n)
+        A_ub[0, :] = 0
+        b_ub = -np.random.rand(m)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+        # Infeasibility detected in presolve
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_singleton_row_eq_1(self):
+        c = [1, 1, 1, 2]
+        A_eq = [[1, 0, 0, 0], [0, 2, 0, 0], [1, 0, 0, 0], [1, 1, 1, 1]]
+        b_eq = [1, 2, 2, 4]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+        # Infeasibility detected in presolve
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_singleton_row_eq_2(self):
+        c = [1, 1, 1, 2]
+        A_eq = [[1, 0, 0, 0], [0, 2, 0, 0], [1, 0, 0, 0], [1, 1, 1, 1]]
+        b_eq = [1, 2, 1, 4]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=4)
+
+    def test_singleton_row_ub_1(self):
+        c = [1, 1, 1, 2]
+        A_ub = [[1, 0, 0, 0], [0, 2, 0, 0], [-1, 0, 0, 0], [1, 1, 1, 1]]
+        b_ub = [1, 2, -2, 4]
+        bounds = [(None, None), (0, None), (0, None), (0, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+        # Infeasibility detected in presolve
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_singleton_row_ub_2(self):
+        c = [1, 1, 1, 2]
+        A_ub = [[1, 0, 0, 0], [0, 2, 0, 0], [-1, 0, 0, 0], [1, 1, 1, 1]]
+        b_ub = [1, 2, -0.5, 4]
+        bounds = [(None, None), (0, None), (0, None), (0, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=0.5)
+
+    def test_infeasible(self):
+        # Test linprog response to an infeasible problem
+        c = [-1, -1]
+        A_ub = [[1, 0],
+                [0, 1],
+                [-1, -1]]
+        b_ub = [2, 2, -5]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+    def test_infeasible_inequality_bounds(self):
+        c = [1]
+        A_ub = [[2]]
+        b_ub = 4
+        bounds = (5, 6)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+        # Infeasibility detected in presolve
+        if self.options.get('presolve', True):
+            assert_equal(res.nit, 0)
+
+    def test_unbounded(self):
+        # Test linprog response to an unbounded problem
+        c = np.array([1, 1]) * -1  # maximize
+        A_ub = [[-1, 1],
+                [-1, -1]]
+        b_ub = [-1, -2]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+
+    def test_unbounded_below_no_presolve_corrected(self):
+        c = [1]
+        bounds = [(None, 1)]
+
+        o = {key: self.options[key] for key in self.options}
+        o["presolve"] = False
+
+        res = linprog(c=c, bounds=bounds,
+                      method=self.method,
+                      options=o)
+        if self.method == "revised simplex":
+            # Revised simplex has a special pathway for no constraints.
+            assert_equal(res.status, 5)
+        else:
+            _assert_unbounded(res)
+
+    def test_unbounded_no_nontrivial_constraints_1(self):
+        """
+        Test whether presolve pathway for detecting unboundedness after
+        constraint elimination is working.
+        """
+        c = np.array([0, 0, 0, 1, -1, -1])
+        A_ub = np.array([[1, 0, 0, 0, 0, 0],
+                         [0, 1, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, -1]])
+        b_ub = np.array([2, -2, 0])
+        bounds = [(None, None), (None, None), (None, None),
+                  (-1, 1), (-1, 1), (0, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+        if not self.method.lower().startswith("highs"):
+            assert_equal(res.x[-1], np.inf)
+            assert_equal(res.message[:36],
+                         "The problem is (trivially) unbounded")
+
+    def test_unbounded_no_nontrivial_constraints_2(self):
+        """
+        Test whether presolve pathway for detecting unboundedness after
+        constraint elimination is working.
+        """
+        c = np.array([0, 0, 0, 1, -1, 1])
+        A_ub = np.array([[1, 0, 0, 0, 0, 0],
+                         [0, 1, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 1]])
+        b_ub = np.array([2, -2, 0])
+        bounds = [(None, None), (None, None), (None, None),
+                  (-1, 1), (-1, 1), (None, 0)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+        if not self.method.lower().startswith("highs"):
+            assert_equal(res.x[-1], -np.inf)
+            assert_equal(res.message[:36],
+                         "The problem is (trivially) unbounded")
+
+    def test_cyclic_recovery(self):
+        # Test linprogs recovery from cycling using the Klee-Minty problem
+        # Klee-Minty  https://www.math.ubc.ca/~israel/m340/kleemin3.pdf
+        c = np.array([100, 10, 1]) * -1  # maximize
+        A_ub = [[1, 0, 0],
+                [20, 1, 0],
+                [200, 20, 1]]
+        b_ub = [1, 100, 10000]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[0, 0, 10000], atol=5e-6, rtol=1e-7)
+
+    def test_cyclic_bland(self):
+        # Test the effect of Bland's rule on a cycling problem
+        c = np.array([-10, 57, 9, 24.])
+        A_ub = np.array([[0.5, -5.5, -2.5, 9],
+                         [0.5, -1.5, -0.5, 1],
+                         [1, 0, 0, 0]])
+        b_ub = [0, 0, 1]
+
+        # copy the existing options dictionary but change maxiter
+        maxiter = 100
+        o = {key: val for key, val in self.options.items()}
+        o['maxiter'] = maxiter
+
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=o)
+
+        if self.method == 'simplex' and not self.options.get('bland'):
+            # simplex cycles without Bland's rule
+            _assert_iteration_limit_reached(res, o['maxiter'])
+        else:
+            # other methods, including simplex with Bland's rule, succeed
+            _assert_success(res, desired_x=[1, 0, 1, 0])
+        # note that revised simplex skips this test because it may or may not
+        # cycle depending on the initial basis
+
+    def test_remove_redundancy_infeasibility(self):
+        # mostly a test of redundancy removal, which is carefully tested in
+        # test__remove_redundancy.py
+        m, n = 10, 10
+        c = np.random.rand(n)
+        A_eq = np.random.rand(m, n)
+        b_eq = np.random.rand(m)
+        A_eq[-1, :] = 2 * A_eq[-2, :]
+        b_eq[-1] *= -1
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+    #################
+    # General Tests #
+    #################
+
+    def test_nontrivial_problem(self):
+        # Problem involves all constraint types,
+        # negative resource limits, and rounding issues.
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+
+    def test_lpgen_problem(self):
+        # Test linprog  with a rather large problem (400 variables,
+        # 40 constraints) generated by https://gist.github.com/denis-bz/8647461
+        A_ub, b_ub, c = lpgen_2d(20, 20)
+
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "Solving system with option 'sym_pos'")
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-64.049494229)
+
+    def test_network_flow(self):
+        # A network flow problem with supply and demand at nodes
+        # and with costs along directed edges.
+        # https://www.princeton.edu/~rvdb/542/lectures/lec10.pdf
+        c = [2, 4, 9, 11, 4, 3, 8, 7, 0, 15, 16, 18]
+        n, p = -1, 1
+        A_eq = [
+            [n, n, p, 0, p, 0, 0, 0, 0, p, 0, 0],
+            [p, 0, 0, p, 0, p, 0, 0, 0, 0, 0, 0],
+            [0, 0, n, n, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, p, p, 0, 0, p, 0],
+            [0, 0, 0, 0, n, n, n, 0, p, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, n, n, 0, 0, p],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, n, n, n]]
+        b_eq = [0, 19, -16, 33, 0, 0, -36]
+        with suppress_warnings() as sup:
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=755, atol=1e-6, rtol=1e-7)
+
+    def test_network_flow_limited_capacity(self):
+        # A network flow problem with supply and demand at nodes
+        # and with costs and capacities along directed edges.
+        # http://blog.sommer-forst.de/2013/04/10/
+        c = [2, 2, 1, 3, 1]
+        bounds = [
+            [0, 4],
+            [0, 2],
+            [0, 2],
+            [0, 3],
+            [0, 5]]
+        n, p = -1, 1
+        A_eq = [
+            [n, n, 0, 0, 0],
+            [p, 0, n, n, 0],
+            [0, p, p, 0, n],
+            [0, 0, 0, p, p]]
+        b_eq = [-4, 0, 0, 4]
+
+        with suppress_warnings() as sup:
+            # this is an UmfpackWarning but I had trouble importing it
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(RuntimeWarning, "scipy.linalg.solve\nIll...")
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            sup.filter(OptimizeWarning, "Solving system with option...")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=14)
+
+    def test_simplex_algorithm_wikipedia_example(self):
+        # https://en.wikipedia.org/wiki/Simplex_algorithm#Example
+        c = [-2, -3, -4]
+        A_ub = [
+            [3, 2, 1],
+            [2, 5, 3]]
+        b_ub = [10, 15]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-20)
+
+    def test_enzo_example(self):
+        # https://github.com/scipy/scipy/issues/1779 lp2.py
+        #
+        # Translated from Octave code at:
+        # http://www.ecs.shimane-u.ac.jp/~kyoshida/lpeng.htm
+        # and placed under MIT licence by Enzo Michelangeli
+        # with permission explicitly granted by the original author,
+        # Prof. Kazunobu Yoshida
+        c = [4, 8, 3, 0, 0, 0]
+        A_eq = [
+            [2, 5, 3, -1, 0, 0],
+            [3, 2.5, 8, 0, -1, 0],
+            [8, 10, 4, 0, 0, -1]]
+        b_eq = [185, 155, 600]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=317.5,
+                        desired_x=[66.25, 0, 17.5, 0, 183.75, 0],
+                        atol=6e-6, rtol=1e-7)
+
+    def test_enzo_example_b(self):
+        # rescued from https://github.com/scipy/scipy/pull/218
+        c = [2.8, 6.3, 10.8, -2.8, -6.3, -10.8]
+        A_eq = [[-1, -1, -1, 0, 0, 0],
+                [0, 0, 0, 1, 1, 1],
+                [1, 0, 0, 1, 0, 0],
+                [0, 1, 0, 0, 1, 0],
+                [0, 0, 1, 0, 0, 1]]
+        b_eq = [-0.5, 0.4, 0.3, 0.3, 0.3]
+
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-1.77,
+                        desired_x=[0.3, 0.2, 0.0, 0.0, 0.1, 0.3])
+
+    def test_enzo_example_c_with_degeneracy(self):
+        # rescued from https://github.com/scipy/scipy/pull/218
+        m = 20
+        c = -np.ones(m)
+        tmp = 2 * np.pi * np.arange(1, m + 1) / (m + 1)
+        A_eq = np.vstack((np.cos(tmp) - 1, np.sin(tmp)))
+        b_eq = [0, 0]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=0, desired_x=np.zeros(m))
+
+    def test_enzo_example_c_with_unboundedness(self):
+        # rescued from https://github.com/scipy/scipy/pull/218
+        m = 50
+        c = -np.ones(m)
+        tmp = 2 * np.pi * np.arange(m) / (m + 1)
+        # This test relies on `cos(0) -1 == sin(0)`, so ensure that's true
+        # (SIMD code or -ffast-math may cause spurious failures otherwise)
+        row0 = np.cos(tmp) - 1
+        row0[0] = 0.0
+        row1 = np.sin(tmp)
+        row1[0] = 0.0
+        A_eq = np.vstack((row0, row1))
+        b_eq = [0, 0]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_unbounded(res)
+
+    def test_enzo_example_c_with_infeasibility(self):
+        # rescued from https://github.com/scipy/scipy/pull/218
+        m = 50
+        c = -np.ones(m)
+        tmp = 2 * np.pi * np.arange(m) / (m + 1)
+        A_eq = np.vstack((np.cos(tmp) - 1, np.sin(tmp)))
+        b_eq = [1, 1]
+
+        o = {key: self.options[key] for key in self.options}
+        o["presolve"] = False
+
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=o)
+        _assert_infeasible(res)
+
+    def test_basic_artificial_vars(self):
+        # Problem is chosen to test two phase simplex methods when at the end
+        # of phase 1 some artificial variables remain in the basis.
+        # Also, for `method='simplex'`, the row in the tableau corresponding
+        # with the artificial variables is not all zero.
+        c = np.array([-0.1, -0.07, 0.004, 0.004, 0.004, 0.004])
+        A_ub = np.array([[1.0, 0, 0, 0, 0, 0], [-1.0, 0, 0, 0, 0, 0],
+                         [0, -1.0, 0, 0, 0, 0], [0, 1.0, 0, 0, 0, 0],
+                         [1.0, 1.0, 0, 0, 0, 0]])
+        b_ub = np.array([3.0, 3.0, 3.0, 3.0, 20.0])
+        A_eq = np.array([[1.0, 0, -1, 1, -1, 1], [0, -1.0, -1, 1, -1, 1]])
+        b_eq = np.array([0, 0])
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=0, desired_x=np.zeros_like(c),
+                        atol=2e-6)
+
+    def test_optimize_result(self):
+        # check all fields in OptimizeResult
+        c, A_ub, b_ub, A_eq, b_eq, bounds = very_random_gen(0)
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method, options=self.options)
+        assert_(res.success)
+        assert_(res.nit)
+        assert_(not res.status)
+        if 'highs' not in self.method:
+            # HiGHS status/message tested separately
+            assert_(res.message == "Optimization terminated successfully.")
+        assert_allclose(c @ res.x, res.fun)
+        assert_allclose(b_eq - A_eq @ res.x, res.con, atol=1e-11)
+        assert_allclose(b_ub - A_ub @ res.x, res.slack, atol=1e-11)
+        for key in ['eqlin', 'ineqlin', 'lower', 'upper']:
+            if key in res.keys():
+                assert isinstance(res[key]['marginals'], np.ndarray)
+                assert isinstance(res[key]['residual'], np.ndarray)
+
+    #################
+    # Bug Fix Tests #
+    #################
+
+    def test_bug_5400(self):
+        # https://github.com/scipy/scipy/issues/5400
+        bounds = [
+            (0, None),
+            (0, 100), (0, 100), (0, 100), (0, 100), (0, 100), (0, 100),
+            (0, 900), (0, 900), (0, 900), (0, 900), (0, 900), (0, 900),
+            (0, None), (0, None), (0, None), (0, None), (0, None), (0, None)]
+
+        f = 1 / 9
+        g = -1e4
+        h = -3.1
+        A_ub = np.array([
+            [1, -2.99, 0, 0, -3, 0, 0, 0, -1, -1, 0, -1, -1, 1, 1, 0, 0, 0, 0],
+            [1, 0, -2.9, h, 0, -3, 0, -1, 0, 0, -1, 0, -1, 0, 0, 1, 1, 0, 0],
+            [1, 0, 0, h, 0, 0, -3, -1, -1, 0, -1, -1, 0, 0, 0, 0, 0, 1, 1],
+            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
+            [0, 1.99, -1, -1, 0, 0, 0, -1, f, f, 0, 0, 0, g, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 2, -1, -1, 0, 0, 0, -1, f, f, 0, g, 0, 0, 0, 0],
+            [0, -1, 1.9, 2.1, 0, 0, 0, f, -1, -1, 0, 0, 0, 0, 0, g, 0, 0, 0],
+            [0, 0, 0, 0, -1, 2, -1, 0, 0, 0, f, -1, f, 0, 0, 0, g, 0, 0],
+            [0, -1, -1, 2.1, 0, 0, 0, f, f, -1, 0, 0, 0, 0, 0, 0, 0, g, 0],
+            [0, 0, 0, 0, -1, -1, 2, 0, 0, 0, f, f, -1, 0, 0, 0, 0, 0, g]])
+
+        b_ub = np.array([
+            0.0, 0, 0, 100, 100, 100, 100, 100, 100, 900, 900, 900, 900, 900,
+            900, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+
+        c = np.array([-1.0, 1, 1, 1, 1, 1, 1, 1, 1,
+                      1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning,
+                       "Solving system with option 'sym_pos'")
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=-106.63507541835018)
+
+    def test_bug_6139(self):
+        # linprog(method='simplex') fails to find a basic feasible solution
+        # if phase 1 pseudo-objective function is outside the provided tol.
+        # https://github.com/scipy/scipy/issues/6139
+
+        # Note: This is not strictly a bug as the default tolerance determines
+        # if a result is "close enough" to zero and should not be expected
+        # to work for all cases.
+
+        c = np.array([1, 1, 1])
+        A_eq = np.array([[1., 0., 0.], [-1000., 0., - 1000.]])
+        b_eq = np.array([5.00000000e+00, -1.00000000e+04])
+        A_ub = -np.array([[0., 1000000., 1010000.]])
+        b_ub = -np.array([10000000.])
+        bounds = (None, None)
+
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+
+        _assert_success(res, desired_fun=14.95,
+                        desired_x=np.array([5, 4.95, 5]))
+
+    def test_bug_6690(self):
+        # linprog simplex used to violate bound constraint despite reporting
+        # success.
+        # https://github.com/scipy/scipy/issues/6690
+
+        A_eq = np.array([[0, 0, 0, 0.93, 0, 0.65, 0, 0, 0.83, 0]])
+        b_eq = np.array([0.9626])
+        A_ub = np.array([
+            [0, 0, 0, 1.18, 0, 0, 0, -0.2, 0, -0.22],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0.43, 0, 0, 0, 0, 0, 0],
+            [0, -1.22, -0.25, 0, 0, 0, -2.06, 0, 0, 1.37],
+            [0, 0, 0, 0, 0, 0, 0, -0.25, 0, 0]
+        ])
+        b_ub = np.array([0.615, 0, 0.172, -0.869, -0.022])
+        bounds = np.array([
+            [-0.84, -0.97, 0.34, 0.4, -0.33, -0.74, 0.47, 0.09, -1.45, -0.73],
+            [0.37, 0.02, 2.86, 0.86, 1.18, 0.5, 1.76, 0.17, 0.32, -0.15]
+        ]).T
+        c = np.array([
+            -1.64, 0.7, 1.8, -1.06, -1.16, 0.26, 2.13, 1.53, 0.66, 0.28
+            ])
+
+        with suppress_warnings() as sup:
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(OptimizeWarning,
+                       "Solving system with option 'cholesky'")
+            sup.filter(OptimizeWarning, "Solving system with option 'sym_pos'")
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+
+        desired_fun = -1.19099999999
+        desired_x = np.array([0.3700, -0.9700, 0.3400, 0.4000, 1.1800,
+                              0.5000, 0.4700, 0.0900, 0.3200, -0.7300])
+        _assert_success(res, desired_fun=desired_fun, desired_x=desired_x)
+
+        # Add small tol value to ensure arrays are less than or equal.
+        atol = 1e-6
+        assert_array_less(bounds[:, 0] - atol, res.x)
+        assert_array_less(res.x, bounds[:, 1] + atol)
+
+    def test_bug_7044(self):
+        # linprog simplex failed to "identify correct constraints" (?)
+        # leading to a non-optimal solution if A is rank-deficient.
+        # https://github.com/scipy/scipy/issues/7044
+
+        A_eq, b_eq, c, _, _ = magic_square(3)
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+
+        desired_fun = 1.730550597
+        _assert_success(res, desired_fun=desired_fun)
+        assert_allclose(A_eq.dot(res.x), b_eq)
+        assert_array_less(np.zeros(res.x.size) - 1e-5, res.x)
+
+    def test_bug_7237(self):
+        # https://github.com/scipy/scipy/issues/7237
+        # linprog simplex "explodes" when the pivot value is very
+        # close to zero.
+
+        c = np.array([-1, 0, 0, 0, 0, 0, 0, 0, 0])
+        A_ub = np.array([
+            [1., -724., 911., -551., -555., -896., 478., -80., -293.],
+            [1., 566., 42., 937., 233., 883., 392., -909., 57.],
+            [1., -208., -894., 539., 321., 532., -924., 942., 55.],
+            [1., 857., -859., 83., 462., -265., -971., 826., 482.],
+            [1., 314., -424., 245., -424., 194., -443., -104., -429.],
+            [1., 540., 679., 361., 149., -827., 876., 633., 302.],
+            [0., -1., -0., -0., -0., -0., -0., -0., -0.],
+            [0., -0., -1., -0., -0., -0., -0., -0., -0.],
+            [0., -0., -0., -1., -0., -0., -0., -0., -0.],
+            [0., -0., -0., -0., -1., -0., -0., -0., -0.],
+            [0., -0., -0., -0., -0., -1., -0., -0., -0.],
+            [0., -0., -0., -0., -0., -0., -1., -0., -0.],
+            [0., -0., -0., -0., -0., -0., -0., -1., -0.],
+            [0., -0., -0., -0., -0., -0., -0., -0., -1.],
+            [0., 1., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 1., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 1., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 1., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 1., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 1., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 1., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 1.]
+            ])
+        b_ub = np.array([
+            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+            0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.])
+        A_eq = np.array([[0., 1., 1., 1., 1., 1., 1., 1., 1.]])
+        b_eq = np.array([[1.]])
+        bounds = [(None, None)] * 9
+
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_fun=108.568535, atol=1e-6)
+
+    def test_bug_8174(self):
+        # https://github.com/scipy/scipy/issues/8174
+        # The simplex method sometimes "explodes" if the pivot value is very
+        # close to zero.
+        A_ub = np.array([
+            [22714, 1008, 13380, -2713.5, -1116],
+            [-4986, -1092, -31220, 17386.5, 684],
+            [-4986, 0, 0, -2713.5, 0],
+            [22714, 0, 0, 17386.5, 0]])
+        b_ub = np.zeros(A_ub.shape[0])
+        c = -np.ones(A_ub.shape[1])
+        bounds = [(0, 1)] * A_ub.shape[1]
+        with suppress_warnings() as sup:
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+
+        if self.options.get('tol', 1e-9) < 1e-10 and self.method == 'simplex':
+            _assert_unable_to_find_basic_feasible_sol(res)
+        else:
+            _assert_success(res, desired_fun=-2.0080717488789235, atol=1e-6)
+
+    def test_bug_8174_2(self):
+        # Test supplementary example from issue 8174.
+        # https://github.com/scipy/scipy/issues/8174
+        # https://stackoverflow.com/questions/47717012/linprog-in-scipy-optimize-checking-solution
+        c = np.array([1, 0, 0, 0, 0, 0, 0])
+        A_ub = -np.identity(7)
+        b_ub = np.array([[-2], [-2], [-2], [-2], [-2], [-2], [-2]])
+        A_eq = np.array([
+            [1, 1, 1, 1, 1, 1, 0],
+            [0.3, 1.3, 0.9, 0, 0, 0, -1],
+            [0.3, 0, 0, 0, 0, 0, -2/3],
+            [0, 0.65, 0, 0, 0, 0, -1/15],
+            [0, 0, 0.3, 0, 0, 0, -1/15]
+        ])
+        b_eq = np.array([[100], [0], [0], [0], [0]])
+
+        with suppress_warnings() as sup:
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_fun=43.3333333331385)
+
+    def test_bug_8561(self):
+        # Test that pivot row is chosen correctly when using Bland's rule
+        # This was originally written for the simplex method with
+        # Bland's rule only, but it doesn't hurt to test all methods/options
+        # https://github.com/scipy/scipy/issues/8561
+        c = np.array([7, 0, -4, 1.5, 1.5])
+        A_ub = np.array([
+            [4, 5.5, 1.5, 1.0, -3.5],
+            [1, -2.5, -2, 2.5, 0.5],
+            [3, -0.5, 4, -12.5, -7],
+            [-1, 4.5, 2, -3.5, -2],
+            [5.5, 2, -4.5, -1, 9.5]])
+        b_ub = np.array([0, 0, 0, 0, 1])
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, options=self.options,
+                      method=self.method)
+        _assert_success(res, desired_x=[0, 0, 19, 16/3, 29/3])
+
+    def test_bug_8662(self):
+        # linprog simplex used to report incorrect optimal results
+        # https://github.com/scipy/scipy/issues/8662
+        c = [-10, 10, 6, 3]
+        A_ub = [[8, -8, -4, 6],
+                [-8, 8, 4, -6],
+                [-4, 4, 8, -4],
+                [3, -3, -3, -10]]
+        b_ub = [9, -9, -9, -4]
+        bounds = [(0, None), (0, None), (0, None), (0, None)]
+        desired_fun = 36.0000000000
+
+        with suppress_warnings() as sup:
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res1 = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                           method=self.method, options=self.options)
+
+        # Set boundary condition as a constraint
+        A_ub.append([0, 0, -1, 0])
+        b_ub.append(0)
+        bounds[2] = (None, None)
+
+        with suppress_warnings() as sup:
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res2 = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                           method=self.method, options=self.options)
+        rtol = 1e-5
+        _assert_success(res1, desired_fun=desired_fun, rtol=rtol)
+        _assert_success(res2, desired_fun=desired_fun, rtol=rtol)
+
+    def test_bug_8663(self):
+        # exposed a bug in presolve
+        # https://github.com/scipy/scipy/issues/8663
+        c = [1, 5]
+        A_eq = [[0, -7]]
+        b_eq = [-6]
+        bounds = [(0, None), (None, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[0, 6./7], desired_fun=5*6./7)
+
+    def test_bug_8664(self):
+        # interior-point has trouble with this when presolve is off
+        # tested for interior-point with presolve off in TestLinprogIPSpecific
+        # https://github.com/scipy/scipy/issues/8664
+        c = [4]
+        A_ub = [[2], [5]]
+        b_ub = [4, 4]
+        A_eq = [[0], [-8], [9]]
+        b_eq = [3, 2, 10]
+        with suppress_warnings() as sup:
+            sup.filter(RuntimeWarning)
+            sup.filter(OptimizeWarning, "Solving system with option...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_infeasible(res)
+
+    def test_bug_8973(self):
+        """
+        Test whether bug described at:
+        https://github.com/scipy/scipy/issues/8973
+        was fixed.
+        """
+        c = np.array([0, 0, 0, 1, -1])
+        A_ub = np.array([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0]])
+        b_ub = np.array([2, -2])
+        bounds = [(None, None), (None, None), (None, None), (-1, 1), (-1, 1)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        # solution vector x is not unique
+        _assert_success(res, desired_fun=-2)
+        # HiGHS IPM had an issue where the following wasn't true!
+        assert_equal(c @ res.x, res.fun)
+
+    def test_bug_8973_2(self):
+        """
+        Additional test for:
+        https://github.com/scipy/scipy/issues/8973
+        suggested in
+        https://github.com/scipy/scipy/pull/8985
+        review by @antonior92
+        """
+        c = np.zeros(1)
+        A_ub = np.array([[1]])
+        b_ub = np.array([-2])
+        bounds = (None, None)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options)
+        _assert_success(res, desired_x=[-2], desired_fun=0)
+
+    def test_bug_10124(self):
+        """
+        Test for linprog docstring problem
+        'disp'=True caused revised simplex failure
+        """
+        c = np.zeros(1)
+        A_ub = np.array([[1]])
+        b_ub = np.array([-2])
+        bounds = (None, None)
+        c = [-1, 4]
+        A_ub = [[-3, 1], [1, 2]]
+        b_ub = [6, 4]
+        bounds = [(None, None), (-3, None)]
+        o = {"disp": True}
+        o.update(self.options)
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=o)
+        _assert_success(res, desired_x=[10, -3], desired_fun=-22)
+
+    def test_bug_10349(self):
+        """
+        Test for redundancy removal tolerance issue
+        https://github.com/scipy/scipy/issues/10349
+        """
+        A_eq = np.array([[1, 1, 0, 0, 0, 0],
+                         [0, 0, 1, 1, 0, 0],
+                         [0, 0, 0, 0, 1, 1],
+                         [1, 0, 1, 0, 0, 0],
+                         [0, 0, 0, 1, 1, 0],
+                         [0, 1, 0, 0, 0, 1]])
+        b_eq = np.array([221, 210, 10, 141, 198, 102])
+        c = np.concatenate((0, 1, np.zeros(4)), axis=None)
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=self.options)
+        _assert_success(res, desired_x=[129, 92, 12, 198, 0, 10], desired_fun=92)
+
+    @pytest.mark.skipif(sys.platform == 'darwin',
+                        reason=("Failing on some local macOS builds, "
+                                "see gh-13846"))
+    def test_bug_10466(self):
+        """
+        Test that autoscale fixes poorly-scaled problem
+        """
+        c = [-8., -0., -8., -0., -8., -0., -0., -0., -0., -0., -0., -0., -0.]
+        A_eq = [[1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                [0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                [0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
+                [1., 0., 1., 0., 1., 0., -1., 0., 0., 0., 0., 0., 0.],
+                [1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0.],
+                [1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
+                [1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
+                [1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.],
+                [0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0.],
+                [0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.]]
+
+        b_eq = [3.14572800e+08, 4.19430400e+08, 5.24288000e+08,
+                1.00663296e+09, 1.07374182e+09, 1.07374182e+09,
+                1.07374182e+09, 1.07374182e+09, 1.07374182e+09,
+                1.07374182e+09]
+
+        o = {}
+        # HiGHS methods don't use autoscale option
+        if not self.method.startswith("highs"):
+            o = {"autoscale": True}
+        o.update(self.options)
+
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "Solving system with option...")
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(RuntimeWarning, "scipy.linalg.solve\nIll...")
+            sup.filter(RuntimeWarning, "divide by zero encountered...")
+            sup.filter(RuntimeWarning, "overflow encountered...")
+            sup.filter(RuntimeWarning, "invalid value encountered...")
+            sup.filter(LinAlgWarning, "Ill-conditioned matrix...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=o)
+        assert_allclose(res.fun, -8589934560)
+
+    def test_bug_20584(self):
+        """
+        Test that when integrality is a list of all zeros, linprog gives the
+        same result as when it is an array of all zeros / integrality=None
+        """
+        c = [1, 1]
+        A_ub = [[-1, 0]]
+        b_ub = [-2.5]
+        res1 = linprog(c, A_ub=A_ub, b_ub=b_ub, integrality=[0, 0])
+        res2 = linprog(c, A_ub=A_ub, b_ub=b_ub, integrality=np.asarray([0, 0]))
+        res3 = linprog(c, A_ub=A_ub, b_ub=b_ub, integrality=None)
+        assert_equal(res1.x, res2.x)
+        assert_equal(res1.x, res3.x)
+
+
+#########################
+# Method-specific Tests #
+#########################
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class LinprogSimplexTests(LinprogCommonTests):
+    method = "simplex"
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class LinprogIPTests(LinprogCommonTests):
+    method = "interior-point"
+
+    def test_bug_10466(self):
+        pytest.skip("Test is failing, but solver is deprecated.")
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class LinprogRSTests(LinprogCommonTests):
+    method = "revised simplex"
+
+    # Revised simplex does not reliably solve these problems.
+    # Failure is intermittent due to the random choice of elements to complete
+    # the basis after phase 1 terminates. In any case, linprog exists
+    # gracefully, reporting numerical difficulties. I do not think this should
+    # prevent revised simplex from being merged, as it solves the problems
+    # most of the time and solves a broader range of problems than the existing
+    # simplex implementation.
+    # I believe that the root cause is the same for all three and that this
+    # same issue prevents revised simplex from solving many other problems
+    # reliably. Somehow the pivoting rule allows the algorithm to pivot into
+    # a singular basis. I haven't been able to find a reference that
+    # acknowledges this possibility, suggesting that there is a bug. On the
+    # other hand, the pivoting rule is quite simple, and I can't find a
+    # mistake, which suggests that this is a possibility with the pivoting
+    # rule. Hopefully, a better pivoting rule will fix the issue.
+
+    def test_bug_5400(self):
+        pytest.skip("Intermittent failure acceptable.")
+
+    def test_bug_8662(self):
+        pytest.skip("Intermittent failure acceptable.")
+
+    def test_network_flow(self):
+        pytest.skip("Intermittent failure acceptable.")
+
+
+class LinprogHiGHSTests(LinprogCommonTests):
+    def test_callback(self):
+        # this is the problem from test_callback
+        def cb(res):
+            return None
+        c = np.array([-3, -2])
+        A_ub = [[2, 1], [1, 1], [1, 0]]
+        b_ub = [10, 8, 4]
+        assert_raises(NotImplementedError, linprog, c, A_ub=A_ub, b_ub=b_ub,
+                      callback=cb, method=self.method)
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, method=self.method)
+        _assert_success(res, desired_fun=-18.0, desired_x=[2, 6])
+
+    @pytest.mark.parametrize("options",
+                             [{"maxiter": -1},
+                              {"disp": -1},
+                              {"presolve": -1},
+                              {"time_limit": -1},
+                              {"dual_feasibility_tolerance": -1},
+                              {"primal_feasibility_tolerance": -1},
+                              {"ipm_optimality_tolerance": -1},
+                              {"simplex_dual_edge_weight_strategy": "ekki"},
+                              ])
+    def test_invalid_option_values(self, options):
+        def f(options):
+            linprog(1, method=self.method, options=options)
+        options.update(self.options)
+        assert_warns(OptimizeWarning, f, options=options)
+
+    def test_crossover(self):
+        A_eq, b_eq, c, _, _ = magic_square(4)
+        bounds = (0, 1)
+        res = linprog(c, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method, options=self.options)
+        # there should be nonzero crossover iterations for IPM (only)
+        assert_equal(res.crossover_nit == 0, self.method != "highs-ipm")
+
+    @pytest.mark.fail_slow(5)
+    def test_marginals(self):
+        # Ensure lagrange multipliers are correct by comparing the derivative
+        # w.r.t. b_ub/b_eq/ub/lb to the reported duals.
+        c, A_ub, b_ub, A_eq, b_eq, bounds = very_random_gen(seed=0)
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method, options=self.options)
+        lb, ub = bounds.T
+
+        # sensitivity w.r.t. b_ub
+        def f_bub(x):
+            return linprog(c, A_ub, x, A_eq, b_eq, bounds,
+                           method=self.method).fun
+
+        dfdbub = approx_derivative(f_bub, b_ub, method='3-point', f0=res.fun)
+        assert_allclose(res.ineqlin.marginals, dfdbub)
+
+        # sensitivity w.r.t. b_eq
+        def f_beq(x):
+            return linprog(c, A_ub, b_ub, A_eq, x, bounds,
+                           method=self.method).fun
+
+        dfdbeq = approx_derivative(f_beq, b_eq, method='3-point', f0=res.fun)
+        assert_allclose(res.eqlin.marginals, dfdbeq)
+
+        # sensitivity w.r.t. lb
+        def f_lb(x):
+            bounds = np.array([x, ub]).T
+            return linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                           method=self.method).fun
+
+        with np.errstate(invalid='ignore'):
+            # approx_derivative has trouble where lb is infinite
+            dfdlb = approx_derivative(f_lb, lb, method='3-point', f0=res.fun)
+            dfdlb[~np.isfinite(lb)] = 0
+
+        assert_allclose(res.lower.marginals, dfdlb)
+
+        # sensitivity w.r.t. ub
+        def f_ub(x):
+            bounds = np.array([lb, x]).T
+            return linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                           method=self.method).fun
+
+        with np.errstate(invalid='ignore'):
+            dfdub = approx_derivative(f_ub, ub, method='3-point', f0=res.fun)
+            dfdub[~np.isfinite(ub)] = 0
+
+        assert_allclose(res.upper.marginals, dfdub)
+
+    def test_dual_feasibility(self):
+        # Ensure solution is dual feasible using marginals
+        c, A_ub, b_ub, A_eq, b_eq, bounds = very_random_gen(seed=42)
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method, options=self.options)
+
+        # KKT dual feasibility equation from Theorem 1 from
+        # http://www.personal.psu.edu/cxg286/LPKKT.pdf
+        resid = (-c + A_ub.T @ res.ineqlin.marginals +
+                 A_eq.T @ res.eqlin.marginals +
+                 res.upper.marginals +
+                 res.lower.marginals)
+        assert_allclose(resid, 0, atol=1e-12)
+
+    def test_complementary_slackness(self):
+        # Ensure that the complementary slackness condition is satisfied.
+        c, A_ub, b_ub, A_eq, b_eq, bounds = very_random_gen(seed=42)
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method, options=self.options)
+
+        # KKT complementary slackness equation from Theorem 1 from
+        # http://www.personal.psu.edu/cxg286/LPKKT.pdf modified for
+        # non-zero RHS
+        assert np.allclose(res.ineqlin.marginals @ (b_ub - A_ub @ res.x), 0)
+
+
+################################
+# Simplex Option-Specific Tests#
+################################
+
+
+class TestLinprogSimplexDefault(LinprogSimplexTests):
+
+    def setup_method(self):
+        self.options = {}
+
+    def test_bug_5400(self):
+        pytest.skip("Simplex fails on this problem.")
+
+    def test_bug_7237_low_tol(self):
+        # Fails if the tolerance is too strict. Here, we test that
+        # even if the solution is wrong, the appropriate error is raised.
+        pytest.skip("Simplex fails on this problem.")
+
+    def test_bug_8174_low_tol(self):
+        # Fails if the tolerance is too strict. Here, we test that
+        # even if the solution is wrong, the appropriate warning is issued.
+        self.options.update({'tol': 1e-12})
+        with pytest.warns(OptimizeWarning):
+            super().test_bug_8174()
+
+
+class TestLinprogSimplexBland(LinprogSimplexTests):
+
+    def setup_method(self):
+        self.options = {'bland': True}
+
+    def test_bug_5400(self):
+        pytest.skip("Simplex fails on this problem.")
+
+    def test_bug_8174_low_tol(self):
+        # Fails if the tolerance is too strict. Here, we test that
+        # even if the solution is wrong, the appropriate error is raised.
+        self.options.update({'tol': 1e-12})
+        with pytest.raises(AssertionError):
+            with pytest.warns(OptimizeWarning):
+                super().test_bug_8174()
+
+
+class TestLinprogSimplexNoPresolve(LinprogSimplexTests):
+
+    def setup_method(self):
+        self.options = {'presolve': False}
+
+    is_32_bit = np.intp(0).itemsize < 8
+    is_linux = sys.platform.startswith('linux')
+
+    @pytest.mark.xfail(
+        condition=is_32_bit and is_linux,
+        reason='Fails with warning on 32-bit linux')
+    def test_bug_5400(self):
+        super().test_bug_5400()
+
+    def test_bug_6139_low_tol(self):
+        # Linprog(method='simplex') fails to find a basic feasible solution
+        # if phase 1 pseudo-objective function is outside the provided tol.
+        # https://github.com/scipy/scipy/issues/6139
+        # Without ``presolve`` eliminating such rows the result is incorrect.
+        self.options.update({'tol': 1e-12})
+        with pytest.raises(AssertionError, match='linprog status 4'):
+            return super().test_bug_6139()
+
+    def test_bug_7237_low_tol(self):
+        pytest.skip("Simplex fails on this problem.")
+
+    def test_bug_8174_low_tol(self):
+        # Fails if the tolerance is too strict. Here, we test that
+        # even if the solution is wrong, the appropriate warning is issued.
+        self.options.update({'tol': 1e-12})
+        with pytest.warns(OptimizeWarning):
+            super().test_bug_8174()
+
+    def test_unbounded_no_nontrivial_constraints_1(self):
+        pytest.skip("Tests behavior specific to presolve")
+
+    def test_unbounded_no_nontrivial_constraints_2(self):
+        pytest.skip("Tests behavior specific to presolve")
+
+
+#######################################
+# Interior-Point Option-Specific Tests#
+#######################################
+
+
+class TestLinprogIPDense(LinprogIPTests):
+    options = {"sparse": False}
+
+    # see https://github.com/scipy/scipy/issues/20216 for skip reason
+    @pytest.mark.skipif(
+        sys.platform == 'darwin',
+        reason="Fails on some macOS builds for reason not relevant to test"
+    )
+    def test_bug_6139(self):
+        super().test_bug_6139()
+
+if has_cholmod:
+    class TestLinprogIPSparseCholmod(LinprogIPTests):
+        options = {"sparse": True, "cholesky": True}
+
+
+if has_umfpack:
+    class TestLinprogIPSparseUmfpack(LinprogIPTests):
+        options = {"sparse": True, "cholesky": False}
+
+        def test_network_flow_limited_capacity(self):
+            pytest.skip("Failing due to numerical issues on some platforms.")
+
+
+class TestLinprogIPSparse(LinprogIPTests):
+    options = {"sparse": True, "cholesky": False, "sym_pos": False}
+
+    @pytest.mark.skipif(
+        sys.platform == 'darwin',
+        reason="Fails on macOS x86 Accelerate builds (gh-20510)"
+    )
+    @pytest.mark.xfail_on_32bit("This test is sensitive to machine epsilon level "
+                                "perturbations in linear system solution in "
+                                "_linprog_ip._sym_solve.")
+    def test_bug_6139(self):
+        super().test_bug_6139()
+
+    @pytest.mark.xfail(reason='Fails with ATLAS, see gh-7877')
+    def test_bug_6690(self):
+        # Test defined in base class, but can't mark as xfail there
+        super().test_bug_6690()
+
+    def test_magic_square_sparse_no_presolve(self):
+        # test linprog with a problem with a rank-deficient A_eq matrix
+        A_eq, b_eq, c, _, _ = magic_square(3)
+        bounds = (0, 1)
+
+        with suppress_warnings() as sup:
+            if has_umfpack:
+                sup.filter(UmfpackWarning)
+            sup.filter(MatrixRankWarning, "Matrix is exactly singular")
+            sup.filter(OptimizeWarning, "Solving system with option...")
+
+            o = {key: self.options[key] for key in self.options}
+            o["presolve"] = False
+
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options=o)
+        _assert_success(res, desired_fun=1.730550597)
+
+    def test_sparse_solve_options(self):
+        # checking that problem is solved with all column permutation options
+        A_eq, b_eq, c, _, _ = magic_square(3)
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            sup.filter(OptimizeWarning, "Invalid permc_spec option")
+            o = {key: self.options[key] for key in self.options}
+            permc_specs = ('NATURAL', 'MMD_ATA', 'MMD_AT_PLUS_A',
+                           'COLAMD', 'ekki-ekki-ekki')
+            # 'ekki-ekki-ekki' raises warning about invalid permc_spec option
+            # and uses default
+            for permc_spec in permc_specs:
+                o["permc_spec"] = permc_spec
+                res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                              method=self.method, options=o)
+                _assert_success(res, desired_fun=1.730550597)
+
+
+class TestLinprogIPSparsePresolve(LinprogIPTests):
+    options = {"sparse": True, "_sparse_presolve": True}
+
+    @pytest.mark.skipif(
+        sys.platform == 'darwin',
+        reason="Fails on macOS x86 Accelerate builds (gh-20510)"
+    )
+    @pytest.mark.xfail_on_32bit("This test is sensitive to machine epsilon level "
+                                "perturbations in linear system solution in "
+                                "_linprog_ip._sym_solve.")
+    def test_bug_6139(self):
+        super().test_bug_6139()
+
+    def test_enzo_example_c_with_infeasibility(self):
+        pytest.skip('_sparse_presolve=True incompatible with presolve=False')
+
+    @pytest.mark.xfail(reason='Fails with ATLAS, see gh-7877')
+    def test_bug_6690(self):
+        # Test defined in base class, but can't mark as xfail there
+        super().test_bug_6690()
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class TestLinprogIPSpecific:
+    method = "interior-point"
+    # the following tests don't need to be performed separately for
+    # sparse presolve, sparse after presolve, and dense
+
+    def test_solver_select(self):
+        # check that default solver is selected as expected
+        if has_cholmod:
+            options = {'sparse': True, 'cholesky': True}
+        elif has_umfpack:
+            options = {'sparse': True, 'cholesky': False}
+        else:
+            options = {'sparse': True, 'cholesky': False, 'sym_pos': False}
+        A, b, c = lpgen_2d(20, 20)
+        res1 = linprog(c, A_ub=A, b_ub=b, method=self.method, options=options)
+        res2 = linprog(c, A_ub=A, b_ub=b, method=self.method)  # default solver
+        assert_allclose(res1.fun, res2.fun,
+                        err_msg="linprog default solver unexpected result",
+                        rtol=2e-15, atol=1e-15)
+
+    def test_unbounded_below_no_presolve_original(self):
+        # formerly caused segfault in TravisCI w/ "cholesky":True
+        c = [-1]
+        bounds = [(None, 1)]
+        res = linprog(c=c, bounds=bounds,
+                      method=self.method,
+                      options={"presolve": False, "cholesky": True})
+        _assert_success(res, desired_fun=-1)
+
+    def test_cholesky(self):
+        # use cholesky factorization and triangular solves
+        A, b, c = lpgen_2d(20, 20)
+        res = linprog(c, A_ub=A, b_ub=b, method=self.method,
+                      options={"cholesky": True})  # only for dense
+        _assert_success(res, desired_fun=-64.049494229)
+
+    def test_alternate_initial_point(self):
+        # use "improved" initial point
+        A, b, c = lpgen_2d(20, 20)
+        with suppress_warnings() as sup:
+            sup.filter(RuntimeWarning, "scipy.linalg.solve\nIll...")
+            sup.filter(OptimizeWarning, "Solving system with option...")
+            sup.filter(LinAlgWarning, "Ill-conditioned matrix...")
+            res = linprog(c, A_ub=A, b_ub=b, method=self.method,
+                          options={"ip": True, "disp": True})
+            # ip code is independent of sparse/dense
+        _assert_success(res, desired_fun=-64.049494229)
+
+    def test_bug_8664(self):
+        # interior-point has trouble with this when presolve is off
+        c = [4]
+        A_ub = [[2], [5]]
+        b_ub = [4, 4]
+        A_eq = [[0], [-8], [9]]
+        b_eq = [3, 2, 10]
+        with suppress_warnings() as sup:
+            sup.filter(RuntimeWarning)
+            sup.filter(OptimizeWarning, "Solving system with option...")
+            res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                          method=self.method, options={"presolve": False})
+        assert_(not res.success, "Incorrectly reported success")
+
+
+########################################
+# Revised Simplex Option-Specific Tests#
+########################################
+
+
+class TestLinprogRSCommon(LinprogRSTests):
+    options = {}
+
+    def test_cyclic_bland(self):
+        pytest.skip("Intermittent failure acceptable.")
+
+    def test_nontrivial_problem_with_guess(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=x_star)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+        assert_equal(res.nit, 0)
+
+    def test_nontrivial_problem_with_unbounded_variables(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        bounds = [(None, None), (None, None), (0, None), (None, None)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=x_star)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+        assert_equal(res.nit, 0)
+
+    def test_nontrivial_problem_with_bounded_variables(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        bounds = [(None, 1), (1, None), (0, None), (.4, .6)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=x_star)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+        assert_equal(res.nit, 0)
+
+    def test_nontrivial_problem_with_negative_unbounded_variable(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        b_eq = [4]
+        x_star = np.array([-219/385, 582/385, 0, 4/10])
+        f_star = 3951/385
+        bounds = [(None, None), (1, None), (0, None), (.4, .6)]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=x_star)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+        assert_equal(res.nit, 0)
+
+    def test_nontrivial_problem_with_bad_guess(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        bad_guess = [1, 2, 3, .5]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=bad_guess)
+        assert_equal(res.status, 6)
+
+    def test_redundant_constraints_with_guess(self):
+        A, b, c, _, _ = magic_square(3)
+        p = np.random.rand(*c.shape)
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning, "A_eq does not appear...")
+            sup.filter(RuntimeWarning, "invalid value encountered")
+            sup.filter(LinAlgWarning)
+            res = linprog(c, A_eq=A, b_eq=b, method=self.method)
+            res2 = linprog(c, A_eq=A, b_eq=b, method=self.method, x0=res.x)
+            res3 = linprog(c + p, A_eq=A, b_eq=b, method=self.method, x0=res.x)
+        _assert_success(res2, desired_fun=1.730550597)
+        assert_equal(res2.nit, 0)
+        _assert_success(res3)
+        assert_(res3.nit < res.nit)  # hot start reduces iterations
+
+
+class TestLinprogRSBland(LinprogRSTests):
+    options = {"pivot": "bland"}
+
+
+############################################
+# HiGHS-Simplex-Dual Option-Specific Tests #
+############################################
+
+
+class TestLinprogHiGHSSimplexDual(LinprogHiGHSTests):
+    method = "highs-ds"
+    options = {}
+
+    def test_lad_regression(self):
+        '''
+        The scaled model should be optimal, i.e. not produce unscaled model
+        infeasible.  See https://github.com/ERGO-Code/HiGHS/issues/494.
+        '''
+        # Test to ensure gh-13610 is resolved (mismatch between HiGHS scaled
+        # and unscaled model statuses)
+        c, A_ub, b_ub, bnds = l1_regression_prob()
+        res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=bnds,
+                      method=self.method, options=self.options)
+        assert_equal(res.status, 0)
+        assert_(res.x is not None)
+        assert_(np.all(res.slack > -1e-6))
+        assert_(np.all(res.x <= [np.inf if ub is None else ub
+                                 for lb, ub in bnds]))
+        assert_(np.all(res.x >= [-np.inf if lb is None else lb - 1e-7
+                                 for lb, ub in bnds]))
+
+
+###################################
+# HiGHS-IPM Option-Specific Tests #
+###################################
+
+
+class TestLinprogHiGHSIPM(LinprogHiGHSTests):
+    method = "highs-ipm"
+    options = {}
+
+
+###################################
+# HiGHS-MIP Option-Specific Tests #
+###################################
+
+
+class TestLinprogHiGHSMIP:
+    method = "highs"
+    options = {}
+
+    @pytest.mark.fail_slow(5)
+    @pytest.mark.xfail(condition=(sys.maxsize < 2 ** 32 and
+                       platform.system() == "Linux"),
+                       run=False,
+                       reason="gh-16347")
+    def test_mip1(self):
+        # solve non-relaxed magic square problem (finally!)
+        # also check that values are all integers - they don't always
+        # come out of HiGHS that way
+        n = 4
+        A, b, c, numbers, M = magic_square(n)
+        bounds = [(0, 1)] * len(c)
+        integrality = [1] * len(c)
+
+        res = linprog(c=c*0, A_eq=A, b_eq=b, bounds=bounds,
+                      method=self.method, integrality=integrality)
+
+        s = (numbers.flatten() * res.x).reshape(n**2, n, n)
+        square = np.sum(s, axis=0)
+        np.testing.assert_allclose(square.sum(axis=0), M)
+        np.testing.assert_allclose(square.sum(axis=1), M)
+        np.testing.assert_allclose(np.diag(square).sum(), M)
+        np.testing.assert_allclose(np.diag(square[:, ::-1]).sum(), M)
+
+        np.testing.assert_allclose(res.x, np.round(res.x), atol=1e-12)
+
+    def test_mip2(self):
+        # solve MIP with inequality constraints and all integer constraints
+        # source: slide 5,
+        # https://www.cs.upc.edu/~erodri/webpage/cps/theory/lp/milp/slides.pdf
+
+        # use all array inputs to test gh-16681 (integrality couldn't be array)
+        A_ub = np.array([[2, -2], [-8, 10]])
+        b_ub = np.array([-1, 13])
+        c = -np.array([1, 1])
+
+        bounds = np.array([(0, np.inf)] * len(c))
+        integrality = np.ones_like(c)
+
+        res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds,
+                      method=self.method, integrality=integrality)
+
+        np.testing.assert_allclose(res.x, [1, 2])
+        np.testing.assert_allclose(res.fun, -3)
+
+    def test_mip3(self):
+        # solve MIP with inequality constraints and all integer constraints
+        # source: https://en.wikipedia.org/wiki/Integer_programming#Example
+        A_ub = np.array([[-1, 1], [3, 2], [2, 3]])
+        b_ub = np.array([1, 12, 12])
+        c = -np.array([0, 1])
+
+        bounds = [(0, np.inf)] * len(c)
+        integrality = [1] * len(c)
+
+        res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds,
+                      method=self.method, integrality=integrality)
+
+        np.testing.assert_allclose(res.fun, -2)
+        # two optimal solutions possible, just need one of them
+        assert np.allclose(res.x, [1, 2]) or np.allclose(res.x, [2, 2])
+
+    def test_mip4(self):
+        # solve MIP with inequality constraints and only one integer constraint
+        # source: https://www.mathworks.com/help/optim/ug/intlinprog.html
+        A_ub = np.array([[-1, -2], [-4, -1], [2, 1]])
+        b_ub = np.array([14, -33, 20])
+        c = np.array([8, 1])
+
+        bounds = [(0, np.inf)] * len(c)
+        integrality = [0, 1]
+
+        res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds,
+                      method=self.method, integrality=integrality)
+
+        np.testing.assert_allclose(res.x, [6.5, 7])
+        np.testing.assert_allclose(res.fun, 59)
+
+    def test_mip5(self):
+        # solve MIP with inequality and inequality constraints
+        # source: https://www.mathworks.com/help/optim/ug/intlinprog.html
+        A_ub = np.array([[1, 1, 1]])
+        b_ub = np.array([7])
+        A_eq = np.array([[4, 2, 1]])
+        b_eq = np.array([12])
+        c = np.array([-3, -2, -1])
+
+        bounds = [(0, np.inf), (0, np.inf), (0, 1)]
+        integrality = [0, 1, 0]
+
+        res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                      bounds=bounds, method=self.method,
+                      integrality=integrality)
+
+        np.testing.assert_allclose(res.x, [0, 6, 0])
+        np.testing.assert_allclose(res.fun, -12)
+
+        # gh-16897: these fields were not present, ensure that they are now
+        assert res.get("mip_node_count", None) is not None
+        assert res.get("mip_dual_bound", None) is not None
+        assert res.get("mip_gap", None) is not None
+
+    @pytest.mark.slow
+    @pytest.mark.timeout(120)  # prerelease_deps_coverage_64bit_blas job
+    def test_mip6(self):
+        # solve a larger MIP with only equality constraints
+        # source: https://www.mathworks.com/help/optim/ug/intlinprog.html
+        A_eq = np.array([[22, 13, 26, 33, 21, 3, 14, 26],
+                         [39, 16, 22, 28, 26, 30, 23, 24],
+                         [18, 14, 29, 27, 30, 38, 26, 26],
+                         [41, 26, 28, 36, 18, 38, 16, 26]])
+        b_eq = np.array([7872, 10466, 11322, 12058])
+        c = np.array([2, 10, 13, 17, 7, 5, 7, 3])
+
+        bounds = [(0, np.inf)]*8
+        integrality = [1]*8
+
+        res = linprog(c=c, A_eq=A_eq, b_eq=b_eq, bounds=bounds,
+                      method=self.method, integrality=integrality)
+
+        np.testing.assert_allclose(res.fun, 1854)
+
+    @pytest.mark.xslow
+    def test_mip_rel_gap_passdown(self):
+        # MIP taken from test_mip6, solved with different values of mip_rel_gap
+        # solve a larger MIP with only equality constraints
+        # source: https://www.mathworks.com/help/optim/ug/intlinprog.html
+        A_eq = np.array([[22, 13, 26, 33, 21, 3, 14, 26],
+                         [39, 16, 22, 28, 26, 30, 23, 24],
+                         [18, 14, 29, 27, 30, 38, 26, 26],
+                         [41, 26, 28, 36, 18, 38, 16, 26]])
+        b_eq = np.array([7872, 10466, 11322, 12058])
+        c = np.array([2, 10, 13, 17, 7, 5, 7, 3])
+
+        bounds = [(0, np.inf)]*8
+        integrality = [1]*8
+
+        mip_rel_gaps = [0.5, 0.25, 0.01, 0.001]
+        sol_mip_gaps = []
+        for mip_rel_gap in mip_rel_gaps:
+            res = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
+                          bounds=bounds, method=self.method,
+                          integrality=integrality,
+                          options={"mip_rel_gap": mip_rel_gap})
+            final_mip_gap = res["mip_gap"]
+            # assert that the solution actually has mip_gap lower than the
+            # required mip_rel_gap supplied
+            assert final_mip_gap <= mip_rel_gap
+            sol_mip_gaps.append(final_mip_gap)
+
+        # make sure that the mip_rel_gap parameter is actually doing something
+        # check that differences between solution gaps are declining
+        # monotonically with the mip_rel_gap parameter. np.diff does
+        # x[i+1] - x[i], so flip the array before differencing to get
+        # what should be a positive, monotone decreasing series of solution
+        # gaps
+        gap_diffs = np.diff(np.flip(sol_mip_gaps))
+        assert np.all(gap_diffs >= 0)
+        assert not np.all(gap_diffs == 0)
+
+    def test_semi_continuous(self):
+        # See issue #18106. This tests whether the solution is being
+        # checked correctly (status is 0) when integrality > 1:
+        # values are allowed to be 0 even if 0 is out of bounds.
+
+        c = np.array([1., 1., -1, -1])
+        bounds = np.array([[0.5, 1.5], [0.5, 1.5], [0.5, 1.5], [0.5, 1.5]])
+        integrality = np.array([2, 3, 2, 3])
+
+        res = linprog(c, bounds=bounds,
+                      integrality=integrality, method='highs')
+
+        np.testing.assert_allclose(res.x, [0, 0, 1.5, 1])
+        assert res.status == 0
+
+
+###########################
+# Autoscale-Specific Tests#
+###########################
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class AutoscaleTests:
+    options = {"autoscale": True}
+
+    test_bug_6139 = LinprogCommonTests.test_bug_6139
+    test_bug_6690 = LinprogCommonTests.test_bug_6690
+    test_bug_7237 = LinprogCommonTests.test_bug_7237
+
+
+class TestAutoscaleIP(AutoscaleTests):
+    method = "interior-point"
+
+    def test_bug_6139(self):
+        self.options['tol'] = 1e-10
+        return AutoscaleTests.test_bug_6139(self)
+
+
+class TestAutoscaleSimplex(AutoscaleTests):
+    method = "simplex"
+
+
+class TestAutoscaleRS(AutoscaleTests):
+    method = "revised simplex"
+
+    def test_nontrivial_problem_with_guess(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=x_star)
+        _assert_success(res, desired_fun=f_star, desired_x=x_star)
+        assert_equal(res.nit, 0)
+
+    def test_nontrivial_problem_with_bad_guess(self):
+        c, A_ub, b_ub, A_eq, b_eq, x_star, f_star = nontrivial_problem()
+        bad_guess = [1, 2, 3, .5]
+        res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds,
+                      method=self.method, options=self.options, x0=bad_guess)
+        assert_equal(res.status, 6)
+
+
+###########################
+# Redundancy Removal Tests#
+###########################
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+class RRTests:
+    method = "interior-point"
+    LCT = LinprogCommonTests
+    # these are a few of the existing tests that have redundancy
+    test_RR_infeasibility = LCT.test_remove_redundancy_infeasibility
+    test_bug_10349 = LCT.test_bug_10349
+    test_bug_7044 = LCT.test_bug_7044
+    test_NFLC = LCT.test_network_flow_limited_capacity
+    test_enzo_example_b = LCT.test_enzo_example_b
+
+
+class TestRRSVD(RRTests):
+    options = {"rr_method": "SVD"}
+
+
+class TestRRPivot(RRTests):
+    options = {"rr_method": "pivot"}
+
+
+class TestRRID(RRTests):
+    options = {"rr_method": "ID"}
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lsq_linear.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lsq_linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2fdd12218510be71bbe2c9009b2bad847967add
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_lsq_linear.py
@@ -0,0 +1,285 @@
+import pytest
+
+import numpy as np
+from numpy.linalg import lstsq
+from numpy.testing import assert_allclose, assert_equal, assert_
+
+from scipy.sparse import rand, coo_matrix
+from scipy.sparse.linalg import aslinearoperator
+from scipy.optimize import lsq_linear
+from scipy.optimize._minimize import Bounds
+
+
+A = np.array([
+    [0.171, -0.057],
+    [-0.049, -0.248],
+    [-0.166, 0.054],
+])
+b = np.array([0.074, 1.014, -0.383])
+
+
+class BaseMixin:
+    def setup_method(self):
+        self.rnd = np.random.RandomState(0)
+
+    def test_dense_no_bounds(self):
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, method=self.method, lsq_solver=lsq_solver)
+            assert_allclose(res.x, lstsq(A, b, rcond=-1)[0])
+            assert_allclose(res.x, res.unbounded_sol[0])
+
+    def test_dense_bounds(self):
+        # Solutions for comparison are taken from MATLAB.
+        lb = np.array([-1, -10])
+        ub = np.array([1, 0])
+        unbounded_sol = lstsq(A, b, rcond=-1)[0]
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, lstsq(A, b, rcond=-1)[0])
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+        lb = np.array([0.0, -np.inf])
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, np.inf), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, np.array([0.0, -4.084174437334673]),
+                            atol=1e-6)
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+        lb = np.array([-1, 0])
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, np.inf), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, np.array([0.448427311733504, 0]),
+                            atol=1e-15)
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+        ub = np.array([np.inf, -5])
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (-np.inf, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, np.array([-0.105560998682388, -5]))
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+        ub = np.array([-1, np.inf])
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (-np.inf, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, np.array([-1, -4.181102129483254]))
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+        lb = np.array([0, -4])
+        ub = np.array([1, 0])
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, np.array([0.005236663400791, -4]))
+            assert_allclose(res.unbounded_sol[0], unbounded_sol)
+
+    def test_bounds_variants(self):
+        x = np.array([1, 3])
+        A = self.rnd.uniform(size=(2, 2))
+        b = A@x
+        lb = np.array([1, 1])
+        ub = np.array([2, 2])
+        bounds_old = (lb, ub)
+        bounds_new = Bounds(lb, ub)
+        res_old = lsq_linear(A, b, bounds_old)
+        res_new = lsq_linear(A, b, bounds_new)
+        assert not np.allclose(res_new.x, res_new.unbounded_sol[0])
+        assert_allclose(res_old.x, res_new.x)
+
+    def test_np_matrix(self):
+        # gh-10711
+        with np.testing.suppress_warnings() as sup:
+            sup.filter(PendingDeprecationWarning)
+            A = np.matrix([[20, -4, 0, 2, 3], [10, -2, 1, 0, -1]])
+        k = np.array([20, 15])
+        lsq_linear(A, k)
+
+    def test_dense_rank_deficient(self):
+        A = np.array([[-0.307, -0.184]])
+        b = np.array([0.773])
+        lb = [-0.1, -0.1]
+        ub = [0.1, 0.1]
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.x, [-0.1, -0.1])
+            assert_allclose(res.unbounded_sol[0], lstsq(A, b, rcond=-1)[0])
+
+        A = np.array([
+            [0.334, 0.668],
+            [-0.516, -1.032],
+            [0.192, 0.384],
+        ])
+        b = np.array([-1.436, 0.135, 0.909])
+        lb = [0, -1]
+        ub = [1, -0.5]
+        for lsq_solver in self.lsq_solvers:
+            res = lsq_linear(A, b, (lb, ub), method=self.method,
+                             lsq_solver=lsq_solver)
+            assert_allclose(res.optimality, 0, atol=1e-11)
+            assert_allclose(res.unbounded_sol[0], lstsq(A, b, rcond=-1)[0])
+
+    def test_full_result(self):
+        lb = np.array([0, -4])
+        ub = np.array([1, 0])
+        res = lsq_linear(A, b, (lb, ub), method=self.method)
+
+        assert_allclose(res.x, [0.005236663400791, -4])
+        assert_allclose(res.unbounded_sol[0], lstsq(A, b, rcond=-1)[0])
+
+        r = A.dot(res.x) - b
+        assert_allclose(res.cost, 0.5 * np.dot(r, r))
+        assert_allclose(res.fun, r)
+
+        assert_allclose(res.optimality, 0.0, atol=1e-12)
+        assert_equal(res.active_mask, [0, -1])
+        assert_(res.nit < 15)
+        assert_(res.status == 1 or res.status == 3)
+        assert_(isinstance(res.message, str))
+        assert_(res.success)
+
+    # This is a test for issue #9982.
+    def test_almost_singular(self):
+        A = np.array(
+            [[0.8854232310355122, 0.0365312146937765, 0.0365312146836789],
+             [0.3742460132129041, 0.0130523214078376, 0.0130523214077873],
+             [0.9680633871281361, 0.0319366128718639, 0.0319366128718388]])
+
+        b = np.array(
+            [0.0055029366538097, 0.0026677442422208, 0.0066612514782381])
+
+        result = lsq_linear(A, b, method=self.method)
+        assert_(result.cost < 1.1e-8)
+
+    @pytest.mark.xslow
+    def test_large_rank_deficient(self):
+        np.random.seed(0)
+        n, m = np.sort(np.random.randint(2, 1000, size=2))
+        m *= 2   # make m >> n
+        A = 1.0 * np.random.randint(-99, 99, size=[m, n])
+        b = 1.0 * np.random.randint(-99, 99, size=[m])
+        bounds = 1.0 * np.sort(np.random.randint(-99, 99, size=(2, n)), axis=0)
+        bounds[1, :] += 1.0  # ensure up > lb
+
+        # Make the A matrix strongly rank deficient by replicating some columns
+        w = np.random.choice(n, n)  # Select random columns with duplicates
+        A = A[:, w]
+
+        x_bvls = lsq_linear(A, b, bounds=bounds, method='bvls').x
+        x_trf = lsq_linear(A, b, bounds=bounds, method='trf').x
+
+        cost_bvls = np.sum((A @ x_bvls - b)**2)
+        cost_trf = np.sum((A @ x_trf - b)**2)
+
+        assert_(abs(cost_bvls - cost_trf) < cost_trf*1e-10)
+
+    def test_convergence_small_matrix(self):
+        A = np.array([[49.0, 41.0, -32.0],
+                      [-19.0, -32.0, -8.0],
+                      [-13.0, 10.0, 69.0]])
+        b = np.array([-41.0, -90.0, 47.0])
+        bounds = np.array([[31.0, -44.0, 26.0],
+                           [54.0, -32.0, 28.0]])
+
+        x_bvls = lsq_linear(A, b, bounds=bounds, method='bvls').x
+        x_trf = lsq_linear(A, b, bounds=bounds, method='trf').x
+
+        cost_bvls = np.sum((A @ x_bvls - b)**2)
+        cost_trf = np.sum((A @ x_trf - b)**2)
+
+        assert_(abs(cost_bvls - cost_trf) < cost_trf*1e-10)
+
+
+class SparseMixin:
+    def test_sparse_and_LinearOperator(self):
+        m = 5000
+        n = 1000
+        A = rand(m, n, random_state=0)
+        b = self.rnd.randn(m)
+        res = lsq_linear(A, b)
+        assert_allclose(res.optimality, 0, atol=1e-6)
+
+        A = aslinearoperator(A)
+        res = lsq_linear(A, b)
+        assert_allclose(res.optimality, 0, atol=1e-6)
+
+    @pytest.mark.fail_slow(5)
+    def test_sparse_bounds(self):
+        m = 5000
+        n = 1000
+        A = rand(m, n, random_state=0)
+        b = self.rnd.randn(m)
+        lb = self.rnd.randn(n)
+        ub = lb + 1
+        res = lsq_linear(A, b, (lb, ub))
+        assert_allclose(res.optimality, 0.0, atol=1e-6)
+
+        res = lsq_linear(A, b, (lb, ub), lsmr_tol=1e-13,
+                         lsmr_maxiter=1500)
+        assert_allclose(res.optimality, 0.0, atol=1e-6)
+
+        res = lsq_linear(A, b, (lb, ub), lsmr_tol='auto')
+        assert_allclose(res.optimality, 0.0, atol=1e-6)
+
+    def test_sparse_ill_conditioned(self):
+        # Sparse matrix with condition number of ~4 million
+        data = np.array([1., 1., 1., 1. + 1e-6, 1.])
+        row = np.array([0, 0, 1, 2, 2])
+        col = np.array([0, 2, 1, 0, 2])
+        A = coo_matrix((data, (row, col)), shape=(3, 3))
+
+        # Get the exact solution
+        exact_sol = lsq_linear(A.toarray(), b, lsq_solver='exact')
+
+        # Default lsmr arguments should not fully converge the solution
+        default_lsmr_sol = lsq_linear(A, b, lsq_solver='lsmr')
+        with pytest.raises(AssertionError, match=""):
+            assert_allclose(exact_sol.x, default_lsmr_sol.x)
+
+        # By increasing the maximum lsmr iters, it will converge
+        conv_lsmr = lsq_linear(A, b, lsq_solver='lsmr', lsmr_maxiter=10)
+        assert_allclose(exact_sol.x, conv_lsmr.x)
+
+
+class TestTRF(BaseMixin, SparseMixin):
+    method = 'trf'
+    lsq_solvers = ['exact', 'lsmr']
+
+
+class TestBVLS(BaseMixin):
+    method = 'bvls'
+    lsq_solvers = ['exact']
+
+
+class TestErrorChecking:
+    def test_option_lsmr_tol(self):
+        # Should work with a positive float, string equal to 'auto', or None
+        _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol=1e-2)
+        _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol='auto')
+        _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol=None)
+
+        # Should raise error with negative float, strings
+        # other than 'auto', and integers
+        err_message = "`lsmr_tol` must be None, 'auto', or positive float."
+        with pytest.raises(ValueError, match=err_message):
+            _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol=-0.1)
+        with pytest.raises(ValueError, match=err_message):
+            _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol='foo')
+        with pytest.raises(ValueError, match=err_message):
+            _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_tol=1)
+
+    def test_option_lsmr_maxiter(self):
+        # Should work with positive integers or None
+        _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_maxiter=1)
+        _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_maxiter=None)
+
+        # Should raise error with 0 or negative max iter
+        err_message = "`lsmr_maxiter` must be None or positive integer."
+        with pytest.raises(ValueError, match=err_message):
+            _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_maxiter=0)
+        with pytest.raises(ValueError, match=err_message):
+            _ = lsq_linear(A, b, lsq_solver='lsmr', lsmr_maxiter=-1)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minimize_constrained.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minimize_constrained.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee700ec6e959dfdde8158d2bd09123cf602881c6
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minimize_constrained.py
@@ -0,0 +1,828 @@
+import numpy as np
+import pytest
+from scipy.linalg import block_diag
+from scipy.sparse import csc_matrix
+from numpy.testing import (assert_array_almost_equal,
+                           assert_array_less, assert_, assert_allclose,
+                           suppress_warnings)
+from scipy.optimize import (NonlinearConstraint,
+                            LinearConstraint,
+                            Bounds,
+                            minimize,
+                            BFGS,
+                            SR1,
+                            rosen)
+
+
+class Maratos:
+    """Problem 15.4 from Nocedal and Wright
+
+    The following optimization problem:
+        minimize 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+        Subject to: x[0]**2 + x[1]**2 - 1 = 0
+    """
+
+    def __init__(self, degrees=60, constr_jac=None, constr_hess=None):
+        rads = degrees/180*np.pi
+        self.x0 = [np.cos(rads), np.sin(rads)]
+        self.x_opt = np.array([1.0, 0.0])
+        self.constr_jac = constr_jac
+        self.constr_hess = constr_hess
+        self.bounds = None
+
+    def fun(self, x):
+        return 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+
+    def grad(self, x):
+        return np.array([4*x[0]-1, 4*x[1]])
+
+    def hess(self, x):
+        return 4*np.eye(2)
+
+    @property
+    def constr(self):
+        def fun(x):
+            return x[0]**2 + x[1]**2
+
+        if self.constr_jac is None:
+            def jac(x):
+                return [[2*x[0], 2*x[1]]]
+        else:
+            jac = self.constr_jac
+
+        if self.constr_hess is None:
+            def hess(x, v):
+                return 2*v[0]*np.eye(2)
+        else:
+            hess = self.constr_hess
+
+        return NonlinearConstraint(fun, 1, 1, jac, hess)
+
+
+class MaratosTestArgs:
+    """Problem 15.4 from Nocedal and Wright
+
+    The following optimization problem:
+        minimize 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+        Subject to: x[0]**2 + x[1]**2 - 1 = 0
+    """
+
+    def __init__(self, a, b, degrees=60, constr_jac=None, constr_hess=None):
+        rads = degrees/180*np.pi
+        self.x0 = [np.cos(rads), np.sin(rads)]
+        self.x_opt = np.array([1.0, 0.0])
+        self.constr_jac = constr_jac
+        self.constr_hess = constr_hess
+        self.a = a
+        self.b = b
+        self.bounds = None
+
+    def _test_args(self, a, b):
+        if self.a != a or self.b != b:
+            raise ValueError()
+
+    def fun(self, x, a, b):
+        self._test_args(a, b)
+        return 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+
+    def grad(self, x, a, b):
+        self._test_args(a, b)
+        return np.array([4*x[0]-1, 4*x[1]])
+
+    def hess(self, x, a, b):
+        self._test_args(a, b)
+        return 4*np.eye(2)
+
+    @property
+    def constr(self):
+        def fun(x):
+            return x[0]**2 + x[1]**2
+
+        if self.constr_jac is None:
+            def jac(x):
+                return [[4*x[0], 4*x[1]]]
+        else:
+            jac = self.constr_jac
+
+        if self.constr_hess is None:
+            def hess(x, v):
+                return 2*v[0]*np.eye(2)
+        else:
+            hess = self.constr_hess
+
+        return NonlinearConstraint(fun, 1, 1, jac, hess)
+
+
+class MaratosGradInFunc:
+    """Problem 15.4 from Nocedal and Wright
+
+    The following optimization problem:
+        minimize 2*(x[0]**2 + x[1]**2 - 1) - x[0]
+        Subject to: x[0]**2 + x[1]**2 - 1 = 0
+    """
+
+    def __init__(self, degrees=60, constr_jac=None, constr_hess=None):
+        rads = degrees/180*np.pi
+        self.x0 = [np.cos(rads), np.sin(rads)]
+        self.x_opt = np.array([1.0, 0.0])
+        self.constr_jac = constr_jac
+        self.constr_hess = constr_hess
+        self.bounds = None
+
+    def fun(self, x):
+        return (2*(x[0]**2 + x[1]**2 - 1) - x[0],
+                np.array([4*x[0]-1, 4*x[1]]))
+
+    @property
+    def grad(self):
+        return True
+
+    def hess(self, x):
+        return 4*np.eye(2)
+
+    @property
+    def constr(self):
+        def fun(x):
+            return x[0]**2 + x[1]**2
+
+        if self.constr_jac is None:
+            def jac(x):
+                return [[4*x[0], 4*x[1]]]
+        else:
+            jac = self.constr_jac
+
+        if self.constr_hess is None:
+            def hess(x, v):
+                return 2*v[0]*np.eye(2)
+        else:
+            hess = self.constr_hess
+
+        return NonlinearConstraint(fun, 1, 1, jac, hess)
+
+
+class HyperbolicIneq:
+    """Problem 15.1 from Nocedal and Wright
+
+    The following optimization problem:
+        minimize 1/2*(x[0] - 2)**2 + 1/2*(x[1] - 1/2)**2
+        Subject to: 1/(x[0] + 1) - x[1] >= 1/4
+                                   x[0] >= 0
+                                   x[1] >= 0
+    """
+    def __init__(self, constr_jac=None, constr_hess=None):
+        self.x0 = [0, 0]
+        self.x_opt = [1.952823, 0.088659]
+        self.constr_jac = constr_jac
+        self.constr_hess = constr_hess
+        self.bounds = Bounds(0, np.inf)
+
+    def fun(self, x):
+        return 1/2*(x[0] - 2)**2 + 1/2*(x[1] - 1/2)**2
+
+    def grad(self, x):
+        return [x[0] - 2, x[1] - 1/2]
+
+    def hess(self, x):
+        return np.eye(2)
+
+    @property
+    def constr(self):
+        def fun(x):
+            return 1/(x[0] + 1) - x[1]
+
+        if self.constr_jac is None:
+            def jac(x):
+                return [[-1/(x[0] + 1)**2, -1]]
+        else:
+            jac = self.constr_jac
+
+        if self.constr_hess is None:
+            def hess(x, v):
+                return 2*v[0]*np.array([[1/(x[0] + 1)**3, 0],
+                                        [0, 0]])
+        else:
+            hess = self.constr_hess
+
+        return NonlinearConstraint(fun, 0.25, np.inf, jac, hess)
+
+
+class Rosenbrock:
+    """Rosenbrock function.
+
+    The following optimization problem:
+        minimize sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
+    """
+
+    def __init__(self, n=2, random_state=0):
+        rng = np.random.RandomState(random_state)
+        self.x0 = rng.uniform(-1, 1, n)
+        self.x_opt = np.ones(n)
+        self.bounds = None
+
+    def fun(self, x):
+        x = np.asarray(x)
+        r = np.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0,
+                   axis=0)
+        return r
+
+    def grad(self, x):
+        x = np.asarray(x)
+        xm = x[1:-1]
+        xm_m1 = x[:-2]
+        xm_p1 = x[2:]
+        der = np.zeros_like(x)
+        der[1:-1] = (200 * (xm - xm_m1**2) -
+                     400 * (xm_p1 - xm**2) * xm - 2 * (1 - xm))
+        der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
+        der[-1] = 200 * (x[-1] - x[-2]**2)
+        return der
+
+    def hess(self, x):
+        x = np.atleast_1d(x)
+        H = np.diag(-400 * x[:-1], 1) - np.diag(400 * x[:-1], -1)
+        diagonal = np.zeros(len(x), dtype=x.dtype)
+        diagonal[0] = 1200 * x[0]**2 - 400 * x[1] + 2
+        diagonal[-1] = 200
+        diagonal[1:-1] = 202 + 1200 * x[1:-1]**2 - 400 * x[2:]
+        H = H + np.diag(diagonal)
+        return H
+
+    @property
+    def constr(self):
+        return ()
+
+
+class IneqRosenbrock(Rosenbrock):
+    """Rosenbrock subject to inequality constraints.
+
+    The following optimization problem:
+        minimize sum(100.0*(x[1] - x[0]**2)**2.0 + (1 - x[0])**2)
+        subject to: x[0] + 2 x[1] <= 1
+
+    Taken from matlab ``fmincon`` documentation.
+    """
+    def __init__(self, random_state=0):
+        Rosenbrock.__init__(self, 2, random_state)
+        self.x0 = [-1, -0.5]
+        self.x_opt = [0.5022, 0.2489]
+        self.bounds = None
+
+    @property
+    def constr(self):
+        A = [[1, 2]]
+        b = 1
+        return LinearConstraint(A, -np.inf, b)
+
+
+class BoundedRosenbrock(Rosenbrock):
+    """Rosenbrock subject to inequality constraints.
+
+    The following optimization problem:
+        minimize sum(100.0*(x[1] - x[0]**2)**2.0 + (1 - x[0])**2)
+        subject to:  -2 <= x[0] <= 0
+                      0 <= x[1] <= 2
+
+    Taken from matlab ``fmincon`` documentation.
+    """
+    def __init__(self, random_state=0):
+        Rosenbrock.__init__(self, 2, random_state)
+        self.x0 = [-0.2, 0.2]
+        self.x_opt = None
+        self.bounds = Bounds([-2, 0], [0, 2])
+
+
+class EqIneqRosenbrock(Rosenbrock):
+    """Rosenbrock subject to equality and inequality constraints.
+
+    The following optimization problem:
+        minimize sum(100.0*(x[1] - x[0]**2)**2.0 + (1 - x[0])**2)
+        subject to: x[0] + 2 x[1] <= 1
+                    2 x[0] + x[1] = 1
+
+    Taken from matlab ``fimincon`` documentation.
+    """
+    def __init__(self, random_state=0):
+        Rosenbrock.__init__(self, 2, random_state)
+        self.x0 = [-1, -0.5]
+        self.x_opt = [0.41494, 0.17011]
+        self.bounds = None
+
+    @property
+    def constr(self):
+        A_ineq = [[1, 2]]
+        b_ineq = 1
+        A_eq = [[2, 1]]
+        b_eq = 1
+        return (LinearConstraint(A_ineq, -np.inf, b_ineq),
+                LinearConstraint(A_eq, b_eq, b_eq))
+
+
+class Elec:
+    """Distribution of electrons on a sphere.
+
+    Problem no 2 from COPS collection [2]_. Find
+    the equilibrium state distribution (of minimal
+    potential) of the electrons positioned on a
+    conducting sphere.
+
+    References
+    ----------
+    .. [1] E. D. Dolan, J. J. Mor\'{e}, and T. S. Munson,
+           "Benchmarking optimization software with COPS 3.0.",
+            Argonne National Lab., Argonne, IL (US), 2004.
+    """
+    def __init__(self, n_electrons=200, random_state=0,
+                 constr_jac=None, constr_hess=None):
+        self.n_electrons = n_electrons
+        self.rng = np.random.RandomState(random_state)
+        # Initial Guess
+        phi = self.rng.uniform(0, 2 * np.pi, self.n_electrons)
+        theta = self.rng.uniform(-np.pi, np.pi, self.n_electrons)
+        x = np.cos(theta) * np.cos(phi)
+        y = np.cos(theta) * np.sin(phi)
+        z = np.sin(theta)
+        self.x0 = np.hstack((x, y, z))
+        self.x_opt = None
+        self.constr_jac = constr_jac
+        self.constr_hess = constr_hess
+        self.bounds = None
+
+    def _get_cordinates(self, x):
+        x_coord = x[:self.n_electrons]
+        y_coord = x[self.n_electrons:2 * self.n_electrons]
+        z_coord = x[2 * self.n_electrons:]
+        return x_coord, y_coord, z_coord
+
+    def _compute_coordinate_deltas(self, x):
+        x_coord, y_coord, z_coord = self._get_cordinates(x)
+        dx = x_coord[:, None] - x_coord
+        dy = y_coord[:, None] - y_coord
+        dz = z_coord[:, None] - z_coord
+        return dx, dy, dz
+
+    def fun(self, x):
+        dx, dy, dz = self._compute_coordinate_deltas(x)
+        with np.errstate(divide='ignore'):
+            dm1 = (dx**2 + dy**2 + dz**2) ** -0.5
+        dm1[np.diag_indices_from(dm1)] = 0
+        return 0.5 * np.sum(dm1)
+
+    def grad(self, x):
+        dx, dy, dz = self._compute_coordinate_deltas(x)
+
+        with np.errstate(divide='ignore'):
+            dm3 = (dx**2 + dy**2 + dz**2) ** -1.5
+        dm3[np.diag_indices_from(dm3)] = 0
+
+        grad_x = -np.sum(dx * dm3, axis=1)
+        grad_y = -np.sum(dy * dm3, axis=1)
+        grad_z = -np.sum(dz * dm3, axis=1)
+
+        return np.hstack((grad_x, grad_y, grad_z))
+
+    def hess(self, x):
+        dx, dy, dz = self._compute_coordinate_deltas(x)
+        d = (dx**2 + dy**2 + dz**2) ** 0.5
+
+        with np.errstate(divide='ignore'):
+            dm3 = d ** -3
+            dm5 = d ** -5
+
+        i = np.arange(self.n_electrons)
+        dm3[i, i] = 0
+        dm5[i, i] = 0
+
+        Hxx = dm3 - 3 * dx**2 * dm5
+        Hxx[i, i] = -np.sum(Hxx, axis=1)
+
+        Hxy = -3 * dx * dy * dm5
+        Hxy[i, i] = -np.sum(Hxy, axis=1)
+
+        Hxz = -3 * dx * dz * dm5
+        Hxz[i, i] = -np.sum(Hxz, axis=1)
+
+        Hyy = dm3 - 3 * dy**2 * dm5
+        Hyy[i, i] = -np.sum(Hyy, axis=1)
+
+        Hyz = -3 * dy * dz * dm5
+        Hyz[i, i] = -np.sum(Hyz, axis=1)
+
+        Hzz = dm3 - 3 * dz**2 * dm5
+        Hzz[i, i] = -np.sum(Hzz, axis=1)
+
+        H = np.vstack((
+            np.hstack((Hxx, Hxy, Hxz)),
+            np.hstack((Hxy, Hyy, Hyz)),
+            np.hstack((Hxz, Hyz, Hzz))
+        ))
+
+        return H
+
+    @property
+    def constr(self):
+        def fun(x):
+            x_coord, y_coord, z_coord = self._get_cordinates(x)
+            return x_coord**2 + y_coord**2 + z_coord**2 - 1
+
+        if self.constr_jac is None:
+            def jac(x):
+                x_coord, y_coord, z_coord = self._get_cordinates(x)
+                Jx = 2 * np.diag(x_coord)
+                Jy = 2 * np.diag(y_coord)
+                Jz = 2 * np.diag(z_coord)
+                return csc_matrix(np.hstack((Jx, Jy, Jz)))
+        else:
+            jac = self.constr_jac
+
+        if self.constr_hess is None:
+            def hess(x, v):
+                D = 2 * np.diag(v)
+                return block_diag(D, D, D)
+        else:
+            hess = self.constr_hess
+
+        return NonlinearConstraint(fun, -np.inf, 0, jac, hess)
+
+
+class TestTrustRegionConstr:
+    list_of_problems = [Maratos(),
+                        Maratos(constr_hess='2-point'),
+                        Maratos(constr_hess=SR1()),
+                        Maratos(constr_jac='2-point', constr_hess=SR1()),
+                        MaratosGradInFunc(),
+                        HyperbolicIneq(),
+                        HyperbolicIneq(constr_hess='3-point'),
+                        HyperbolicIneq(constr_hess=BFGS()),
+                        HyperbolicIneq(constr_jac='3-point',
+                                       constr_hess=BFGS()),
+                        Rosenbrock(),
+                        IneqRosenbrock(),
+                        EqIneqRosenbrock(),
+                        BoundedRosenbrock(),
+                        Elec(n_electrons=2),
+                        Elec(n_electrons=2, constr_hess='2-point'),
+                        Elec(n_electrons=2, constr_hess=SR1()),
+                        Elec(n_electrons=2, constr_jac='3-point',
+                             constr_hess=SR1())]
+
+    @pytest.mark.parametrize('prob', list_of_problems)
+    @pytest.mark.parametrize('grad', ('prob.grad', '3-point', False))
+    @pytest.mark.parametrize('hess', ("prob.hess", '3-point', SR1(),
+                                      BFGS(exception_strategy='damp_update'),
+                                      BFGS(exception_strategy='skip_update')))
+    def test_list_of_problems(self, prob, grad, hess):
+        grad = prob.grad if grad == "prob.grad" else grad
+        hess = prob.hess if hess == "prob.hess" else hess
+        # Remove exceptions
+        if (grad in {'2-point', '3-point', 'cs', False} and
+                hess in {'2-point', '3-point', 'cs'}):
+            pytest.skip("Numerical Hessian needs analytical gradient")
+        if prob.grad is True and grad in {'3-point', False}:
+            pytest.skip("prob.grad incompatible with grad in {'3-point', False}")
+        sensitive = (isinstance(prob, BoundedRosenbrock) and grad == '3-point'
+                     and isinstance(hess, BFGS))
+        if sensitive:
+            pytest.xfail("Seems sensitive to initial conditions w/ Accelerate")
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "delta_grad == 0.0")
+            result = minimize(prob.fun, prob.x0,
+                              method='trust-constr',
+                              jac=grad, hess=hess,
+                              bounds=prob.bounds,
+                              constraints=prob.constr)
+
+        if prob.x_opt is not None:
+            assert_array_almost_equal(result.x, prob.x_opt,
+                                      decimal=5)
+            # gtol
+            if result.status == 1:
+                assert_array_less(result.optimality, 1e-8)
+        # xtol
+        if result.status == 2:
+            assert_array_less(result.tr_radius, 1e-8)
+
+            if result.method == "tr_interior_point":
+                assert_array_less(result.barrier_parameter, 1e-8)
+
+        # check for max iter
+        message = f"Invalid termination condition: {result.status}."
+        assert result.status not in {0, 3}, message
+
+
+    def test_default_jac_and_hess(self):
+        def fun(x):
+            return (x - 1) ** 2
+        bounds = [(-2, 2)]
+        res = minimize(fun, x0=[-1.5], bounds=bounds, method='trust-constr')
+        assert_array_almost_equal(res.x, 1, decimal=5)
+
+    def test_default_hess(self):
+        def fun(x):
+            return (x - 1) ** 2
+        bounds = [(-2, 2)]
+        res = minimize(fun, x0=[-1.5], bounds=bounds, method='trust-constr',
+                       jac='2-point')
+        assert_array_almost_equal(res.x, 1, decimal=5)
+
+    def test_no_constraints(self):
+        prob = Rosenbrock()
+        result = minimize(prob.fun, prob.x0,
+                          method='trust-constr',
+                          jac=prob.grad, hess=prob.hess)
+        result1 = minimize(prob.fun, prob.x0,
+                           method='L-BFGS-B',
+                           jac='2-point')
+
+        result2 = minimize(prob.fun, prob.x0,
+                           method='L-BFGS-B',
+                           jac='3-point')
+        assert_array_almost_equal(result.x, prob.x_opt, decimal=5)
+        assert_array_almost_equal(result1.x, prob.x_opt, decimal=5)
+        assert_array_almost_equal(result2.x, prob.x_opt, decimal=5)
+
+    def test_hessp(self):
+        prob = Maratos()
+
+        def hessp(x, p):
+            H = prob.hess(x)
+            return H.dot(p)
+
+        result = minimize(prob.fun, prob.x0,
+                          method='trust-constr',
+                          jac=prob.grad, hessp=hessp,
+                          bounds=prob.bounds,
+                          constraints=prob.constr)
+
+        if prob.x_opt is not None:
+            assert_array_almost_equal(result.x, prob.x_opt, decimal=2)
+
+        # gtol
+        if result.status == 1:
+            assert_array_less(result.optimality, 1e-8)
+        # xtol
+        if result.status == 2:
+            assert_array_less(result.tr_radius, 1e-8)
+
+            if result.method == "tr_interior_point":
+                assert_array_less(result.barrier_parameter, 1e-8)
+        # max iter
+        if result.status in (0, 3):
+            raise RuntimeError("Invalid termination condition.")
+
+    def test_args(self):
+        prob = MaratosTestArgs("a", 234)
+
+        result = minimize(prob.fun, prob.x0, ("a", 234),
+                          method='trust-constr',
+                          jac=prob.grad, hess=prob.hess,
+                          bounds=prob.bounds,
+                          constraints=prob.constr)
+
+        if prob.x_opt is not None:
+            assert_array_almost_equal(result.x, prob.x_opt, decimal=2)
+
+        # gtol
+        if result.status == 1:
+            assert_array_less(result.optimality, 1e-8)
+        # xtol
+        if result.status == 2:
+            assert_array_less(result.tr_radius, 1e-8)
+            if result.method == "tr_interior_point":
+                assert_array_less(result.barrier_parameter, 1e-8)
+        # max iter
+        if result.status in (0, 3):
+            raise RuntimeError("Invalid termination condition.")
+
+    def test_raise_exception(self):
+        prob = Maratos()
+        message = "Whenever the gradient is estimated via finite-differences"
+        with pytest.raises(ValueError, match=message):
+            minimize(prob.fun, prob.x0, method='trust-constr', jac='2-point',
+                     hess='2-point', constraints=prob.constr)
+
+    def test_issue_9044(self):
+        # https://github.com/scipy/scipy/issues/9044
+        # Test the returned `OptimizeResult` contains keys consistent with
+        # other solvers.
+
+        def callback(x, info):
+            assert_('nit' in info)
+            assert_('niter' in info)
+
+        result = minimize(lambda x: x**2, [0], jac=lambda x: 2*x,
+                          hess=lambda x: 2, callback=callback,
+                          method='trust-constr')
+        assert_(result.get('success'))
+        assert_(result.get('nit', -1) == 1)
+
+        # Also check existence of the 'niter' attribute, for backward
+        # compatibility
+        assert_(result.get('niter', -1) == 1)
+
+    def test_issue_15093(self):
+        # scipy docs define bounds as inclusive, so it shouldn't be
+        # an issue to set x0 on the bounds even if keep_feasible is
+        # True. Previously, trust-constr would treat bounds as
+        # exclusive.
+
+        x0 = np.array([0., 0.5])
+
+        def obj(x):
+            x1 = x[0]
+            x2 = x[1]
+            return x1 ** 2 + x2 ** 2
+
+        bounds = Bounds(np.array([0., 0.]), np.array([1., 1.]),
+                        keep_feasible=True)
+
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "delta_grad == 0.0")
+            result = minimize(
+                method='trust-constr',
+                fun=obj,
+                x0=x0,
+                bounds=bounds)
+
+        assert result['success']
+
+class TestEmptyConstraint:
+    """
+    Here we minimize x^2+y^2 subject to x^2-y^2>1.
+    The actual minimum is at (0, 0) which fails the constraint.
+    Therefore we will find a minimum on the boundary at (+/-1, 0).
+
+    When minimizing on the boundary, optimize uses a set of
+    constraints that removes the constraint that sets that
+    boundary.  In our case, there's only one constraint, so
+    the result is an empty constraint.
+
+    This tests that the empty constraint works.
+    """
+    def test_empty_constraint(self):
+
+        def function(x):
+            return x[0]**2 + x[1]**2
+
+        def functionjacobian(x):
+            return np.array([2.*x[0], 2.*x[1]])
+
+        def functionhvp(x, v):
+            return 2.*v
+
+        def constraint(x):
+            return np.array([x[0]**2 - x[1]**2])
+
+        def constraintjacobian(x):
+            return np.array([[2*x[0], -2*x[1]]])
+
+        def constraintlcoh(x, v):
+            return np.array([[2., 0.], [0., -2.]]) * v[0]
+
+        constraint = NonlinearConstraint(constraint, 1., np.inf,
+                                         constraintjacobian, constraintlcoh)
+
+        startpoint = [1., 2.]
+
+        bounds = Bounds([-np.inf, -np.inf], [np.inf, np.inf])
+
+        result = minimize(
+          function,
+          startpoint,
+          method='trust-constr',
+          jac=functionjacobian,
+          hessp=functionhvp,
+          constraints=[constraint],
+          bounds=bounds,
+        )
+
+        assert_array_almost_equal(abs(result.x), np.array([1, 0]), decimal=4)
+
+
+def test_bug_11886():
+    def opt(x):
+        return x[0]**2+x[1]**2
+
+    with np.testing.suppress_warnings() as sup:
+        sup.filter(PendingDeprecationWarning)
+        A = np.matrix(np.diag([1, 1]))
+    lin_cons = LinearConstraint(A, -1, np.inf)
+    # just checking that there are no errors
+    minimize(opt, 2*[1], constraints = lin_cons)
+
+
+# Remove xfail when gh-11649 is resolved
+@pytest.mark.xfail(reason="Known bug in trust-constr; see gh-11649.",
+                   strict=True)
+def test_gh11649():
+    bnds = Bounds(lb=[-1, -1], ub=[1, 1], keep_feasible=True)
+
+    def assert_inbounds(x):
+        assert np.all(x >= bnds.lb)
+        assert np.all(x <= bnds.ub)
+
+    def obj(x):
+        assert_inbounds(x)
+        return np.exp(x[0])*(4*x[0]**2 + 2*x[1]**2 + 4*x[0]*x[1] + 2*x[1] + 1)
+
+    def nce(x):
+        assert_inbounds(x)
+        return x[0]**2 + x[1]
+
+    def nci(x):
+        assert_inbounds(x)
+        return x[0]*x[1]
+
+    x0 = np.array((0.99, -0.99))
+    nlcs = [NonlinearConstraint(nci, -10, np.inf),
+            NonlinearConstraint(nce, 1, 1)]
+
+    res = minimize(fun=obj, x0=x0, method='trust-constr',
+                   bounds=bnds, constraints=nlcs)
+    assert res.success
+    assert_inbounds(res.x)
+    assert nlcs[0].lb < nlcs[0].fun(res.x) < nlcs[0].ub
+    assert_allclose(nce(res.x), nlcs[1].ub)
+
+    ref = minimize(fun=obj, x0=x0, method='slsqp',
+                   bounds=bnds, constraints=nlcs)
+    assert_allclose(res.fun, ref.fun)
+
+
+def test_gh20665_too_many_constraints():
+    # gh-20665 reports a confusing error message when there are more equality
+    # constraints than variables. Check that the error message is improved.
+    message = "...more equality constraints than independent variables..."
+    with pytest.raises(ValueError, match=message):
+        x0 = np.ones((2,))
+        A_eq, b_eq = np.arange(6).reshape((3, 2)), np.ones((3,))
+        g = NonlinearConstraint(lambda x:  A_eq @ x, lb=b_eq, ub=b_eq)
+        minimize(rosen, x0, method='trust-constr', constraints=[g])
+    # no error with `SVDFactorization`
+    with np.testing.suppress_warnings() as sup:
+        sup.filter(UserWarning)
+        minimize(rosen, x0, method='trust-constr', constraints=[g],
+                 options={'factorization_method': 'SVDFactorization'})
+
+
+class TestBoundedNelderMead:
+
+    @pytest.mark.parametrize('bounds, x_opt',
+                             [(Bounds(-np.inf, np.inf), Rosenbrock().x_opt),
+                              (Bounds(-np.inf, -0.8), [-0.8, -0.8]),
+                              (Bounds(3.0, np.inf), [3.0, 9.0]),
+                              (Bounds([3.0, 1.0], [4.0, 5.0]), [3., 5.]),
+                              ])
+    def test_rosen_brock_with_bounds(self, bounds, x_opt):
+        prob = Rosenbrock()
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "Initial guess is not within "
+                                    "the specified bounds")
+            result = minimize(prob.fun, [-10, -10],
+                              method='Nelder-Mead',
+                              bounds=bounds)
+            assert np.less_equal(bounds.lb, result.x).all()
+            assert np.less_equal(result.x, bounds.ub).all()
+            assert np.allclose(prob.fun(result.x), result.fun)
+            assert np.allclose(result.x, x_opt, atol=1.e-3)
+
+    def test_equal_all_bounds(self):
+        prob = Rosenbrock()
+        bounds = Bounds([4.0, 5.0], [4.0, 5.0])
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "Initial guess is not within "
+                                    "the specified bounds")
+            result = minimize(prob.fun, [-10, 8],
+                              method='Nelder-Mead',
+                              bounds=bounds)
+            assert np.allclose(result.x, [4.0, 5.0])
+
+    def test_equal_one_bounds(self):
+        prob = Rosenbrock()
+        bounds = Bounds([4.0, 5.0], [4.0, 20.0])
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "Initial guess is not within "
+                                    "the specified bounds")
+            result = minimize(prob.fun, [-10, 8],
+                              method='Nelder-Mead',
+                              bounds=bounds)
+            assert np.allclose(result.x, [4.0, 16.0])
+
+    def test_invalid_bounds(self):
+        prob = Rosenbrock()
+        message = 'An upper bound is less than the corresponding lower bound.'
+        with pytest.raises(ValueError, match=message):
+            bounds = Bounds([-np.inf, 1.0], [4.0, -5.0])
+            minimize(prob.fun, [-10, 3],
+                     method='Nelder-Mead',
+                     bounds=bounds)
+
+    @pytest.mark.xfail(reason="Failing on Azure Linux and macOS builds, "
+                              "see gh-13846")
+    def test_outside_bounds_warning(self):
+        prob = Rosenbrock()
+        message = "Initial guess is not within the specified bounds"
+        with pytest.warns(UserWarning, match=message):
+            bounds = Bounds([-np.inf, 1.0], [4.0, 5.0])
+            minimize(prob.fun, [-10, 8],
+                     method='Nelder-Mead',
+                     bounds=bounds)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minpack.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minpack.py
new file mode 100644
index 0000000000000000000000000000000000000000..b040b1e1253181979726edbd9f717860f6215712
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_minpack.py
@@ -0,0 +1,1121 @@
+"""
+Unit tests for optimization routines from minpack.py.
+"""
+import warnings
+import pytest
+
+from numpy.testing import (assert_, assert_almost_equal, assert_array_equal,
+                           assert_array_almost_equal, assert_allclose,
+                           assert_warns, suppress_warnings)
+from pytest import raises as assert_raises
+import numpy as np
+from numpy import array, float64
+from multiprocessing.pool import ThreadPool
+
+from scipy import optimize, linalg
+from scipy.special import lambertw
+from scipy.optimize._minpack_py import leastsq, curve_fit, fixed_point
+from scipy.optimize import OptimizeWarning
+from scipy.optimize._minimize import Bounds
+
+
+class ReturnShape:
+    """This class exists to create a callable that does not have a '__name__' attribute.
+
+    __init__ takes the argument 'shape', which should be a tuple of ints.
+    When an instance is called with a single argument 'x', it returns numpy.ones(shape).
+    """
+
+    def __init__(self, shape):
+        self.shape = shape
+
+    def __call__(self, x):
+        return np.ones(self.shape)
+
+
+def dummy_func(x, shape):
+    """A function that returns an array of ones of the given shape.
+    `x` is ignored.
+    """
+    return np.ones(shape)
+
+
+def sequence_parallel(fs):
+    with ThreadPool(len(fs)) as pool:
+        return pool.map(lambda f: f(), fs)
+
+
+# Function and Jacobian for tests of solvers for systems of nonlinear
+# equations
+
+
+def pressure_network(flow_rates, Qtot, k):
+    """Evaluate non-linear equation system representing
+    the pressures and flows in a system of n parallel pipes::
+
+        f_i = P_i - P_0, for i = 1..n
+        f_0 = sum(Q_i) - Qtot
+
+    where Q_i is the flow rate in pipe i and P_i the pressure in that pipe.
+    Pressure is modeled as a P=kQ**2 where k is a valve coefficient and
+    Q is the flow rate.
+
+    Parameters
+    ----------
+    flow_rates : float
+        A 1-D array of n flow rates [kg/s].
+    k : float
+        A 1-D array of n valve coefficients [1/kg m].
+    Qtot : float
+        A scalar, the total input flow rate [kg/s].
+
+    Returns
+    -------
+    F : float
+        A 1-D array, F[i] == f_i.
+
+    """
+    P = k * flow_rates**2
+    F = np.hstack((P[1:] - P[0], flow_rates.sum() - Qtot))
+    return F
+
+
+def pressure_network_jacobian(flow_rates, Qtot, k):
+    """Return the jacobian of the equation system F(flow_rates)
+    computed by `pressure_network` with respect to
+    *flow_rates*. See `pressure_network` for the detailed
+    description of parameters.
+
+    Returns
+    -------
+    jac : float
+        *n* by *n* matrix ``df_i/dQ_i`` where ``n = len(flow_rates)``
+        and *f_i* and *Q_i* are described in the doc for `pressure_network`
+    """
+    n = len(flow_rates)
+    pdiff = np.diag(flow_rates[1:] * 2 * k[1:] - 2 * flow_rates[0] * k[0])
+
+    jac = np.empty((n, n))
+    jac[:n-1, :n-1] = pdiff * 0
+    jac[:n-1, n-1] = 0
+    jac[n-1, :] = np.ones(n)
+
+    return jac
+
+
+def pressure_network_fun_and_grad(flow_rates, Qtot, k):
+    return (pressure_network(flow_rates, Qtot, k),
+            pressure_network_jacobian(flow_rates, Qtot, k))
+
+
+class TestFSolve:
+    def test_pressure_network_no_gradient(self):
+        # fsolve without gradient, equal pipes -> equal flows.
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows, info, ier, mesg = optimize.fsolve(
+            pressure_network, initial_guess, args=(Qtot, k),
+            full_output=True)
+        assert_array_almost_equal(final_flows, np.ones(4))
+        assert_(ier == 1, mesg)
+
+    def test_pressure_network_with_gradient(self):
+        # fsolve with gradient, equal pipes -> equal flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows = optimize.fsolve(
+            pressure_network, initial_guess, args=(Qtot, k),
+            fprime=pressure_network_jacobian)
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+    def test_wrong_shape_func_callable(self):
+        func = ReturnShape(1)
+        # x0 is a list of two elements, but func will return an array with
+        # length 1, so this should result in a TypeError.
+        x0 = [1.5, 2.0]
+        assert_raises(TypeError, optimize.fsolve, func, x0)
+
+    def test_wrong_shape_func_function(self):
+        # x0 is a list of two elements, but func will return an array with
+        # length 1, so this should result in a TypeError.
+        x0 = [1.5, 2.0]
+        assert_raises(TypeError, optimize.fsolve, dummy_func, x0, args=((1,),))
+
+    def test_wrong_shape_fprime_callable(self):
+        func = ReturnShape(1)
+        deriv_func = ReturnShape((2,2))
+        assert_raises(TypeError, optimize.fsolve, func, x0=[0,1], fprime=deriv_func)
+
+    def test_wrong_shape_fprime_function(self):
+        def func(x):
+            return dummy_func(x, (2,))
+        def deriv_func(x):
+            return dummy_func(x, (3, 3))
+        assert_raises(TypeError, optimize.fsolve, func, x0=[0,1], fprime=deriv_func)
+
+    def test_func_can_raise(self):
+        def func(*args):
+            raise ValueError('I raised')
+
+        with assert_raises(ValueError, match='I raised'):
+            optimize.fsolve(func, x0=[0])
+
+    def test_Dfun_can_raise(self):
+        def func(x):
+            return x - np.array([10])
+
+        def deriv_func(*args):
+            raise ValueError('I raised')
+
+        with assert_raises(ValueError, match='I raised'):
+            optimize.fsolve(func, x0=[0], fprime=deriv_func)
+
+    def test_float32(self):
+        def func(x):
+            return np.array([x[0] - 100, x[1] - 1000], dtype=np.float32) ** 2
+        p = optimize.fsolve(func, np.array([1, 1], np.float32))
+        assert_allclose(func(p), [0, 0], atol=1e-3)
+
+    def test_reentrant_func(self):
+        def func(*args):
+            self.test_pressure_network_no_gradient()
+            return pressure_network(*args)
+
+        # fsolve without gradient, equal pipes -> equal flows.
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows, info, ier, mesg = optimize.fsolve(
+            func, initial_guess, args=(Qtot, k),
+            full_output=True)
+        assert_array_almost_equal(final_flows, np.ones(4))
+        assert_(ier == 1, mesg)
+
+    def test_reentrant_Dfunc(self):
+        def deriv_func(*args):
+            self.test_pressure_network_with_gradient()
+            return pressure_network_jacobian(*args)
+
+        # fsolve with gradient, equal pipes -> equal flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows = optimize.fsolve(
+            pressure_network, initial_guess, args=(Qtot, k),
+            fprime=deriv_func)
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+    def test_concurrent_no_gradient(self):
+        v = sequence_parallel([self.test_pressure_network_no_gradient] * 10)
+        assert all([result is None for result in v])
+
+    def test_concurrent_with_gradient(self):
+        v = sequence_parallel([self.test_pressure_network_with_gradient] * 10)
+        assert all([result is None for result in v])
+
+
+class TestRootHybr:
+    def test_pressure_network_no_gradient(self):
+        # root/hybr without gradient, equal pipes -> equal flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows = optimize.root(pressure_network, initial_guess,
+                                    method='hybr', args=(Qtot, k)).x
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+    def test_pressure_network_with_gradient(self):
+        # root/hybr with gradient, equal pipes -> equal flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([[2., 0., 2., 0.]])
+        final_flows = optimize.root(pressure_network, initial_guess,
+                                    args=(Qtot, k), method='hybr',
+                                    jac=pressure_network_jacobian).x
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+    def test_pressure_network_with_gradient_combined(self):
+        # root/hybr with gradient and function combined, equal pipes -> equal
+        # flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows = optimize.root(pressure_network_fun_and_grad,
+                                    initial_guess, args=(Qtot, k),
+                                    method='hybr', jac=True).x
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+
+class TestRootLM:
+    def test_pressure_network_no_gradient(self):
+        # root/lm without gradient, equal pipes -> equal flows
+        k = np.full(4, 0.5)
+        Qtot = 4
+        initial_guess = array([2., 0., 2., 0.])
+        final_flows = optimize.root(pressure_network, initial_guess,
+                                    method='lm', args=(Qtot, k)).x
+        assert_array_almost_equal(final_flows, np.ones(4))
+
+
+class TestNfev:
+    def zero_f(self, y):
+        self.nfev += 1
+        return y**2-3
+
+    @pytest.mark.parametrize('method', ['hybr', 'lm', 'broyden1',
+                                        'broyden2', 'anderson',
+                                        'linearmixing', 'diagbroyden',
+                                        'excitingmixing', 'krylov',
+                                        'df-sane'])
+    def test_root_nfev(self, method):
+        self.nfev = 0
+        solution = optimize.root(self.zero_f, 100, method=method)
+        assert solution.nfev == self.nfev
+
+    def test_fsolve_nfev(self):
+        self.nfev = 0
+        x, info, ier, mesg = optimize.fsolve(self.zero_f, 100, full_output=True)
+        assert info['nfev'] == self.nfev
+
+
+class TestLeastSq:
+    def setup_method(self):
+        x = np.linspace(0, 10, 40)
+        a,b,c = 3.1, 42, -304.2
+        self.x = x
+        self.abc = a,b,c
+        y_true = a*x**2 + b*x + c
+        np.random.seed(0)
+        self.y_meas = y_true + 0.01*np.random.standard_normal(y_true.shape)
+
+    def residuals(self, p, y, x):
+        a,b,c = p
+        err = y-(a*x**2 + b*x + c)
+        return err
+
+    def residuals_jacobian(self, _p, _y, x):
+        return -np.vstack([x**2, x, np.ones_like(x)]).T
+
+    def test_basic(self):
+        p0 = array([0,0,0])
+        params_fit, ier = leastsq(self.residuals, p0,
+                                  args=(self.y_meas, self.x))
+        assert_(ier in (1,2,3,4), 'solution not found (ier=%d)' % ier)
+        # low precision due to random
+        assert_array_almost_equal(params_fit, self.abc, decimal=2)
+
+    def test_basic_with_gradient(self):
+        p0 = array([0,0,0])
+        params_fit, ier = leastsq(self.residuals, p0,
+                                  args=(self.y_meas, self.x),
+                                  Dfun=self.residuals_jacobian)
+        assert_(ier in (1,2,3,4), 'solution not found (ier=%d)' % ier)
+        # low precision due to random
+        assert_array_almost_equal(params_fit, self.abc, decimal=2)
+
+    def test_full_output(self):
+        p0 = array([[0,0,0]])
+        full_output = leastsq(self.residuals, p0,
+                              args=(self.y_meas, self.x),
+                              full_output=True)
+        params_fit, cov_x, infodict, mesg, ier = full_output
+        assert_(ier in (1,2,3,4), f'solution not found: {mesg}')
+
+    def test_input_untouched(self):
+        p0 = array([0,0,0],dtype=float64)
+        p0_copy = array(p0, copy=True)
+        full_output = leastsq(self.residuals, p0,
+                              args=(self.y_meas, self.x),
+                              full_output=True)
+        params_fit, cov_x, infodict, mesg, ier = full_output
+        assert_(ier in (1,2,3,4), f'solution not found: {mesg}')
+        assert_array_equal(p0, p0_copy)
+
+    def test_wrong_shape_func_callable(self):
+        func = ReturnShape(1)
+        # x0 is a list of two elements, but func will return an array with
+        # length 1, so this should result in a TypeError.
+        x0 = [1.5, 2.0]
+        assert_raises(TypeError, optimize.leastsq, func, x0)
+
+    def test_wrong_shape_func_function(self):
+        # x0 is a list of two elements, but func will return an array with
+        # length 1, so this should result in a TypeError.
+        x0 = [1.5, 2.0]
+        assert_raises(TypeError, optimize.leastsq, dummy_func, x0, args=((1,),))
+
+    def test_wrong_shape_Dfun_callable(self):
+        func = ReturnShape(1)
+        deriv_func = ReturnShape((2,2))
+        assert_raises(TypeError, optimize.leastsq, func, x0=[0,1], Dfun=deriv_func)
+
+    def test_wrong_shape_Dfun_function(self):
+        def func(x):
+            return dummy_func(x, (2,))
+        def deriv_func(x):
+            return dummy_func(x, (3, 3))
+        assert_raises(TypeError, optimize.leastsq, func, x0=[0,1], Dfun=deriv_func)
+
+    def test_float32(self):
+        # Regression test for gh-1447
+        def func(p,x,y):
+            q = p[0]*np.exp(-(x-p[1])**2/(2.0*p[2]**2))+p[3]
+            return q - y
+
+        x = np.array([1.475,1.429,1.409,1.419,1.455,1.519,1.472, 1.368,1.286,
+                       1.231], dtype=np.float32)
+        y = np.array([0.0168,0.0193,0.0211,0.0202,0.0171,0.0151,0.0185,0.0258,
+                      0.034,0.0396], dtype=np.float32)
+        p0 = np.array([1.0,1.0,1.0,1.0])
+        p1, success = optimize.leastsq(func, p0, args=(x,y))
+
+        assert_(success in [1,2,3,4])
+        assert_((func(p1,x,y)**2).sum() < 1e-4 * (func(p0,x,y)**2).sum())
+
+    def test_func_can_raise(self):
+        def func(*args):
+            raise ValueError('I raised')
+
+        with assert_raises(ValueError, match='I raised'):
+            optimize.leastsq(func, x0=[0])
+
+    def test_Dfun_can_raise(self):
+        def func(x):
+            return x - np.array([10])
+
+        def deriv_func(*args):
+            raise ValueError('I raised')
+
+        with assert_raises(ValueError, match='I raised'):
+            optimize.leastsq(func, x0=[0], Dfun=deriv_func)
+
+    def test_reentrant_func(self):
+        def func(*args):
+            self.test_basic()
+            return self.residuals(*args)
+
+        p0 = array([0,0,0])
+        params_fit, ier = leastsq(func, p0,
+                                  args=(self.y_meas, self.x))
+        assert_(ier in (1,2,3,4), 'solution not found (ier=%d)' % ier)
+        # low precision due to random
+        assert_array_almost_equal(params_fit, self.abc, decimal=2)
+
+    def test_reentrant_Dfun(self):
+        def deriv_func(*args):
+            self.test_basic()
+            return self.residuals_jacobian(*args)
+
+        p0 = array([0,0,0])
+        params_fit, ier = leastsq(self.residuals, p0,
+                                  args=(self.y_meas, self.x),
+                                  Dfun=deriv_func)
+        assert_(ier in (1,2,3,4), 'solution not found (ier=%d)' % ier)
+        # low precision due to random
+        assert_array_almost_equal(params_fit, self.abc, decimal=2)
+
+    def test_concurrent_no_gradient(self):
+        v = sequence_parallel([self.test_basic] * 10)
+        assert all([result is None for result in v])
+
+    def test_concurrent_with_gradient(self):
+        v = sequence_parallel([self.test_basic_with_gradient] * 10)
+        assert all([result is None for result in v])
+
+    def test_func_input_output_length_check(self):
+
+        def func(x):
+            return 2 * (x[0] - 3) ** 2 + 1
+
+        with assert_raises(TypeError,
+                           match='Improper input: func input vector length N='):
+            optimize.leastsq(func, x0=[0, 1])
+
+
+class TestCurveFit:
+    def setup_method(self):
+        self.y = array([1.0, 3.2, 9.5, 13.7])
+        self.x = array([1.0, 2.0, 3.0, 4.0])
+
+    def test_one_argument(self):
+        def func(x,a):
+            return x**a
+        popt, pcov = curve_fit(func, self.x, self.y)
+        assert_(len(popt) == 1)
+        assert_(pcov.shape == (1,1))
+        assert_almost_equal(popt[0], 1.9149, decimal=4)
+        assert_almost_equal(pcov[0,0], 0.0016, decimal=4)
+
+        # Test if we get the same with full_output. Regression test for #1415.
+        # Also test if check_finite can be turned off.
+        res = curve_fit(func, self.x, self.y,
+                        full_output=1, check_finite=False)
+        (popt2, pcov2, infodict, errmsg, ier) = res
+        assert_array_almost_equal(popt, popt2)
+
+    def test_two_argument(self):
+        def func(x, a, b):
+            return b*x**a
+        popt, pcov = curve_fit(func, self.x, self.y)
+        assert_(len(popt) == 2)
+        assert_(pcov.shape == (2,2))
+        assert_array_almost_equal(popt, [1.7989, 1.1642], decimal=4)
+        assert_array_almost_equal(pcov, [[0.0852, -0.1260], [-0.1260, 0.1912]],
+                                  decimal=4)
+
+    def test_func_is_classmethod(self):
+        class test_self:
+            """This class tests if curve_fit passes the correct number of
+               arguments when the model function is a class instance method.
+            """
+
+            def func(self, x, a, b):
+                return b * x**a
+
+        test_self_inst = test_self()
+        popt, pcov = curve_fit(test_self_inst.func, self.x, self.y)
+        assert_(pcov.shape == (2,2))
+        assert_array_almost_equal(popt, [1.7989, 1.1642], decimal=4)
+        assert_array_almost_equal(pcov, [[0.0852, -0.1260], [-0.1260, 0.1912]],
+                                  decimal=4)
+
+    def test_regression_2639(self):
+        # This test fails if epsfcn in leastsq is too large.
+        x = [574.14200000000005, 574.154, 574.16499999999996,
+             574.17700000000002, 574.18799999999999, 574.19899999999996,
+             574.21100000000001, 574.22199999999998, 574.23400000000004,
+             574.245]
+        y = [859.0, 997.0, 1699.0, 2604.0, 2013.0, 1964.0, 2435.0,
+             1550.0, 949.0, 841.0]
+        guess = [574.1861428571428, 574.2155714285715, 1302.0, 1302.0,
+                 0.0035019999999983615, 859.0]
+        good = [5.74177150e+02, 5.74209188e+02, 1.74187044e+03, 1.58646166e+03,
+                1.0068462e-02, 8.57450661e+02]
+
+        def f_double_gauss(x, x0, x1, A0, A1, sigma, c):
+            return (A0*np.exp(-(x-x0)**2/(2.*sigma**2))
+                    + A1*np.exp(-(x-x1)**2/(2.*sigma**2)) + c)
+        popt, pcov = curve_fit(f_double_gauss, x, y, guess, maxfev=10000)
+        assert_allclose(popt, good, rtol=1e-5)
+
+    def test_pcov(self):
+        xdata = np.array([0, 1, 2, 3, 4, 5])
+        ydata = np.array([1, 1, 5, 7, 8, 12])
+        sigma = np.array([1, 2, 1, 2, 1, 2])
+
+        def f(x, a, b):
+            return a*x + b
+
+        for method in ['lm', 'trf', 'dogbox']:
+            popt, pcov = curve_fit(f, xdata, ydata, p0=[2, 0], sigma=sigma,
+                                   method=method)
+            perr_scaled = np.sqrt(np.diag(pcov))
+            assert_allclose(perr_scaled, [0.20659803, 0.57204404], rtol=1e-3)
+
+            popt, pcov = curve_fit(f, xdata, ydata, p0=[2, 0], sigma=3*sigma,
+                                   method=method)
+            perr_scaled = np.sqrt(np.diag(pcov))
+            assert_allclose(perr_scaled, [0.20659803, 0.57204404], rtol=1e-3)
+
+            popt, pcov = curve_fit(f, xdata, ydata, p0=[2, 0], sigma=sigma,
+                                   absolute_sigma=True, method=method)
+            perr = np.sqrt(np.diag(pcov))
+            assert_allclose(perr, [0.30714756, 0.85045308], rtol=1e-3)
+
+            popt, pcov = curve_fit(f, xdata, ydata, p0=[2, 0], sigma=3*sigma,
+                                   absolute_sigma=True, method=method)
+            perr = np.sqrt(np.diag(pcov))
+            assert_allclose(perr, [3*0.30714756, 3*0.85045308], rtol=1e-3)
+
+        # infinite variances
+
+        def f_flat(x, a, b):
+            return a*x
+
+        pcov_expected = np.array([np.inf]*4).reshape(2, 2)
+
+        with suppress_warnings() as sup:
+            sup.filter(OptimizeWarning,
+                       "Covariance of the parameters could not be estimated")
+            popt, pcov = curve_fit(f_flat, xdata, ydata, p0=[2, 0], sigma=sigma)
+            popt1, pcov1 = curve_fit(f, xdata[:2], ydata[:2], p0=[2, 0])
+
+        assert_(pcov.shape == (2, 2))
+        assert_array_equal(pcov, pcov_expected)
+
+        assert_(pcov1.shape == (2, 2))
+        assert_array_equal(pcov1, pcov_expected)
+
+    def test_array_like(self):
+        # Test sequence input. Regression test for gh-3037.
+        def f_linear(x, a, b):
+            return a*x + b
+
+        x = [1, 2, 3, 4]
+        y = [3, 5, 7, 9]
+        assert_allclose(curve_fit(f_linear, x, y)[0], [2, 1], atol=1e-10)
+
+    def test_indeterminate_covariance(self):
+        # Test that a warning is returned when pcov is indeterminate
+        xdata = np.array([1, 2, 3, 4, 5, 6])
+        ydata = np.array([1, 2, 3, 4, 5.5, 6])
+        assert_warns(OptimizeWarning, curve_fit,
+                     lambda x, a, b: a*x, xdata, ydata)
+
+    def test_NaN_handling(self):
+        # Test for correct handling of NaNs in input data: gh-3422
+
+        # create input with NaNs
+        xdata = np.array([1, np.nan, 3])
+        ydata = np.array([1, 2, 3])
+
+        assert_raises(ValueError, curve_fit,
+                      lambda x, a, b: a*x + b, xdata, ydata)
+        assert_raises(ValueError, curve_fit,
+                      lambda x, a, b: a*x + b, ydata, xdata)
+
+        assert_raises(ValueError, curve_fit, lambda x, a, b: a*x + b,
+                      xdata, ydata, **{"check_finite": True})
+
+    @staticmethod
+    def _check_nan_policy(f, xdata_with_nan, xdata_without_nan,
+                          ydata_with_nan, ydata_without_nan, method):
+        kwargs = {'f': f, 'xdata': xdata_with_nan, 'ydata': ydata_with_nan,
+                  'method': method, 'check_finite': False}
+        # propagate test
+        error_msg = ("`nan_policy='propagate'` is not supported "
+                     "by this function.")
+        with assert_raises(ValueError, match=error_msg):
+            curve_fit(**kwargs, nan_policy="propagate", maxfev=2000)
+
+        # raise test
+        with assert_raises(ValueError, match="The input contains nan"):
+            curve_fit(**kwargs, nan_policy="raise")
+
+        # omit test
+        result_with_nan, _ = curve_fit(**kwargs, nan_policy="omit")
+        kwargs['xdata'] = xdata_without_nan
+        kwargs['ydata'] = ydata_without_nan
+        result_without_nan, _ = curve_fit(**kwargs)
+        assert_allclose(result_with_nan, result_without_nan)
+
+        # not valid policy test
+        # check for argument names in any order
+        error_msg = (r"nan_policy must be one of \{(?:'raise'|'omit'|None)"
+                     r"(?:, ?(?:'raise'|'omit'|None))*\}")
+        with assert_raises(ValueError, match=error_msg):
+            curve_fit(**kwargs, nan_policy="hi")
+
+    @pytest.mark.parametrize('method', ["lm", "trf", "dogbox"])
+    def test_nan_policy_1d(self, method):
+        def f(x, a, b):
+            return a*x + b
+
+        xdata_with_nan = np.array([2, 3, np.nan, 4, 4, np.nan])
+        ydata_with_nan = np.array([1, 2, 5, 3, np.nan, 7])
+        xdata_without_nan = np.array([2, 3, 4])
+        ydata_without_nan = np.array([1, 2, 3])
+
+        self._check_nan_policy(f, xdata_with_nan, xdata_without_nan,
+                               ydata_with_nan, ydata_without_nan, method)
+
+    @pytest.mark.parametrize('method', ["lm", "trf", "dogbox"])
+    def test_nan_policy_2d(self, method):
+        def f(x, a, b):
+            x1 = x[0, :]
+            x2 = x[1, :]
+            return a*x1 + b + x2
+
+        xdata_with_nan = np.array([[2, 3, np.nan, 4, 4, np.nan, 5],
+                                   [2, 3, np.nan, np.nan, 4, np.nan, 7]])
+        ydata_with_nan = np.array([1, 2, 5, 3, np.nan, 7, 10])
+        xdata_without_nan = np.array([[2, 3, 5], [2, 3, 7]])
+        ydata_without_nan = np.array([1, 2, 10])
+
+        self._check_nan_policy(f, xdata_with_nan, xdata_without_nan,
+                               ydata_with_nan, ydata_without_nan, method)
+
+    @pytest.mark.parametrize('n', [2, 3])
+    @pytest.mark.parametrize('method', ["lm", "trf", "dogbox"])
+    def test_nan_policy_2_3d(self, n, method):
+        def f(x, a, b):
+            x1 = x[..., 0, :].squeeze()
+            x2 = x[..., 1, :].squeeze()
+            return a*x1 + b + x2
+
+        xdata_with_nan = np.array([[[2, 3, np.nan, 4, 4, np.nan, 5],
+                                   [2, 3, np.nan, np.nan, 4, np.nan, 7]]])
+        xdata_with_nan = xdata_with_nan.squeeze() if n == 2 else xdata_with_nan
+        ydata_with_nan = np.array([1, 2, 5, 3, np.nan, 7, 10])
+        xdata_without_nan = np.array([[[2, 3, 5], [2, 3, 7]]])
+        ydata_without_nan = np.array([1, 2, 10])
+
+        self._check_nan_policy(f, xdata_with_nan, xdata_without_nan,
+                               ydata_with_nan, ydata_without_nan, method)
+
+    def test_empty_inputs(self):
+        # Test both with and without bounds (regression test for gh-9864)
+        assert_raises(ValueError, curve_fit, lambda x, a: a*x, [], [])
+        assert_raises(ValueError, curve_fit, lambda x, a: a*x, [], [],
+                      bounds=(1, 2))
+        assert_raises(ValueError, curve_fit, lambda x, a: a*x, [1], [])
+        assert_raises(ValueError, curve_fit, lambda x, a: a*x, [2], [],
+                      bounds=(1, 2))
+
+    def test_function_zero_params(self):
+        # Fit args is zero, so "Unable to determine number of fit parameters."
+        assert_raises(ValueError, curve_fit, lambda x: x, [1, 2], [3, 4])
+
+    def test_None_x(self):  # Added in GH10196
+        popt, pcov = curve_fit(lambda _, a: a * np.arange(10),
+                               None, 2 * np.arange(10))
+        assert_allclose(popt, [2.])
+
+    def test_method_argument(self):
+        def f(x, a, b):
+            return a * np.exp(-b*x)
+
+        xdata = np.linspace(0, 1, 11)
+        ydata = f(xdata, 2., 2.)
+
+        for method in ['trf', 'dogbox', 'lm', None]:
+            popt, pcov = curve_fit(f, xdata, ydata, method=method)
+            assert_allclose(popt, [2., 2.])
+
+        assert_raises(ValueError, curve_fit, f, xdata, ydata, method='unknown')
+
+    def test_full_output(self):
+        def f(x, a, b):
+            return a * np.exp(-b * x)
+
+        xdata = np.linspace(0, 1, 11)
+        ydata = f(xdata, 2., 2.)
+
+        for method in ['trf', 'dogbox', 'lm', None]:
+            popt, pcov, infodict, errmsg, ier = curve_fit(
+                f, xdata, ydata, method=method, full_output=True)
+            assert_allclose(popt, [2., 2.])
+            assert "nfev" in infodict
+            assert "fvec" in infodict
+            if method == 'lm' or method is None:
+                assert "fjac" in infodict
+                assert "ipvt" in infodict
+                assert "qtf" in infodict
+            assert isinstance(errmsg, str)
+            assert ier in (1, 2, 3, 4)
+
+    def test_bounds(self):
+        def f(x, a, b):
+            return a * np.exp(-b*x)
+
+        xdata = np.linspace(0, 1, 11)
+        ydata = f(xdata, 2., 2.)
+
+        # The minimum w/out bounds is at [2., 2.],
+        # and with bounds it's at [1.5, smth].
+        lb = [1., 0]
+        ub = [1.5, 3.]
+
+        # Test that both variants of the bounds yield the same result
+        bounds = (lb, ub)
+        bounds_class = Bounds(lb, ub)
+        for method in [None, 'trf', 'dogbox']:
+            popt, pcov = curve_fit(f, xdata, ydata, bounds=bounds,
+                                   method=method)
+            assert_allclose(popt[0], 1.5)
+
+            popt_class, pcov_class = curve_fit(f, xdata, ydata,
+                                               bounds=bounds_class,
+                                               method=method)
+            assert_allclose(popt_class, popt)
+
+        # With bounds, the starting estimate is feasible.
+        popt, pcov = curve_fit(f, xdata, ydata, method='trf',
+                               bounds=([0., 0], [0.6, np.inf]))
+        assert_allclose(popt[0], 0.6)
+
+        # method='lm' doesn't support bounds.
+        assert_raises(ValueError, curve_fit, f, xdata, ydata, bounds=bounds,
+                      method='lm')
+
+    def test_bounds_p0(self):
+        # This test is for issue #5719. The problem was that an initial guess
+        # was ignored when 'trf' or 'dogbox' methods were invoked.
+        def f(x, a):
+            return np.sin(x + a)
+
+        xdata = np.linspace(-2*np.pi, 2*np.pi, 40)
+        ydata = np.sin(xdata)
+        bounds = (-3 * np.pi, 3 * np.pi)
+        for method in ['trf', 'dogbox']:
+            popt_1, _ = curve_fit(f, xdata, ydata, p0=2.1*np.pi)
+            popt_2, _ = curve_fit(f, xdata, ydata, p0=2.1*np.pi,
+                                  bounds=bounds, method=method)
+
+            # If the initial guess is ignored, then popt_2 would be close 0.
+            assert_allclose(popt_1, popt_2)
+
+    def test_jac(self):
+        # Test that Jacobian callable is handled correctly and
+        # weighted if sigma is provided.
+        def f(x, a, b):
+            return a * np.exp(-b*x)
+
+        def jac(x, a, b):
+            e = np.exp(-b*x)
+            return np.vstack((e, -a * x * e)).T
+
+        xdata = np.linspace(0, 1, 11)
+        ydata = f(xdata, 2., 2.)
+
+        # Test numerical options for least_squares backend.
+        for method in ['trf', 'dogbox']:
+            for scheme in ['2-point', '3-point', 'cs']:
+                popt, pcov = curve_fit(f, xdata, ydata, jac=scheme,
+                                       method=method)
+                assert_allclose(popt, [2, 2])
+
+        # Test the analytic option.
+        for method in ['lm', 'trf', 'dogbox']:
+            popt, pcov = curve_fit(f, xdata, ydata, method=method, jac=jac)
+            assert_allclose(popt, [2, 2])
+
+        # Now add an outlier and provide sigma.
+        ydata[5] = 100
+        sigma = np.ones(xdata.shape[0])
+        sigma[5] = 200
+        for method in ['lm', 'trf', 'dogbox']:
+            popt, pcov = curve_fit(f, xdata, ydata, sigma=sigma, method=method,
+                                   jac=jac)
+            # Still the optimization process is influenced somehow,
+            # have to set rtol=1e-3.
+            assert_allclose(popt, [2, 2], rtol=1e-3)
+
+    def test_maxfev_and_bounds(self):
+        # gh-6340: with no bounds, curve_fit accepts parameter maxfev (via leastsq)
+        # but with bounds, the parameter is `max_nfev` (via least_squares)
+        x = np.arange(0, 10)
+        y = 2*x
+        popt1, _ = curve_fit(lambda x,p: p*x, x, y, bounds=(0, 3), maxfev=100)
+        popt2, _ = curve_fit(lambda x,p: p*x, x, y, bounds=(0, 3), max_nfev=100)
+
+        assert_allclose(popt1, 2, atol=1e-14)
+        assert_allclose(popt2, 2, atol=1e-14)
+
+    def test_curvefit_simplecovariance(self):
+
+        def func(x, a, b):
+            return a * np.exp(-b*x)
+
+        def jac(x, a, b):
+            e = np.exp(-b*x)
+            return np.vstack((e, -a * x * e)).T
+
+        np.random.seed(0)
+        xdata = np.linspace(0, 4, 50)
+        y = func(xdata, 2.5, 1.3)
+        ydata = y + 0.2 * np.random.normal(size=len(xdata))
+
+        sigma = np.zeros(len(xdata)) + 0.2
+        covar = np.diag(sigma**2)
+
+        for jac1, jac2 in [(jac, jac), (None, None)]:
+            for absolute_sigma in [False, True]:
+                popt1, pcov1 = curve_fit(func, xdata, ydata, sigma=sigma,
+                        jac=jac1, absolute_sigma=absolute_sigma)
+                popt2, pcov2 = curve_fit(func, xdata, ydata, sigma=covar,
+                        jac=jac2, absolute_sigma=absolute_sigma)
+
+                assert_allclose(popt1, popt2, atol=1e-14)
+                assert_allclose(pcov1, pcov2, atol=1e-14)
+
+    def test_curvefit_covariance(self):
+
+        def funcp(x, a, b):
+            rotn = np.array([[1./np.sqrt(2), -1./np.sqrt(2), 0],
+                             [1./np.sqrt(2), 1./np.sqrt(2), 0],
+                             [0, 0, 1.0]])
+            return rotn.dot(a * np.exp(-b*x))
+
+        def jacp(x, a, b):
+            rotn = np.array([[1./np.sqrt(2), -1./np.sqrt(2), 0],
+                             [1./np.sqrt(2), 1./np.sqrt(2), 0],
+                             [0, 0, 1.0]])
+            e = np.exp(-b*x)
+            return rotn.dot(np.vstack((e, -a * x * e)).T)
+
+        def func(x, a, b):
+            return a * np.exp(-b*x)
+
+        def jac(x, a, b):
+            e = np.exp(-b*x)
+            return np.vstack((e, -a * x * e)).T
+
+        np.random.seed(0)
+        xdata = np.arange(1, 4)
+        y = func(xdata, 2.5, 1.0)
+        ydata = y + 0.2 * np.random.normal(size=len(xdata))
+        sigma = np.zeros(len(xdata)) + 0.2
+        covar = np.diag(sigma**2)
+        # Get a rotation matrix, and obtain ydatap = R ydata
+        # Chisq = ydata^T C^{-1} ydata
+        #       = ydata^T R^T R C^{-1} R^T R ydata
+        #       = ydatap^T Cp^{-1} ydatap
+        # Cp^{-1} = R C^{-1} R^T
+        # Cp      = R C R^T, since R^-1 = R^T
+        rotn = np.array([[1./np.sqrt(2), -1./np.sqrt(2), 0],
+                         [1./np.sqrt(2), 1./np.sqrt(2), 0],
+                         [0, 0, 1.0]])
+        ydatap = rotn.dot(ydata)
+        covarp = rotn.dot(covar).dot(rotn.T)
+
+        for jac1, jac2 in [(jac, jacp), (None, None)]:
+            for absolute_sigma in [False, True]:
+                popt1, pcov1 = curve_fit(func, xdata, ydata, sigma=sigma,
+                        jac=jac1, absolute_sigma=absolute_sigma)
+                popt2, pcov2 = curve_fit(funcp, xdata, ydatap, sigma=covarp,
+                        jac=jac2, absolute_sigma=absolute_sigma)
+
+                assert_allclose(popt1, popt2, rtol=1.2e-7, atol=1e-14)
+                assert_allclose(pcov1, pcov2, rtol=1.2e-7, atol=1e-14)
+
+    @pytest.mark.parametrize("absolute_sigma", [False, True])
+    def test_curvefit_scalar_sigma(self, absolute_sigma):
+        def func(x, a, b):
+            return a * x + b
+
+        x, y = self.x, self.y
+        _, pcov1 = curve_fit(func, x, y, sigma=2, absolute_sigma=absolute_sigma)
+        # Explicitly building the sigma 1D array
+        _, pcov2 = curve_fit(
+                func, x, y, sigma=np.full_like(y, 2), absolute_sigma=absolute_sigma
+        )
+        assert np.all(pcov1 == pcov2)
+
+    def test_dtypes(self):
+        # regression test for gh-9581: curve_fit fails if x and y dtypes differ
+        x = np.arange(-3, 5)
+        y = 1.5*x + 3.0 + 0.5*np.sin(x)
+
+        def func(x, a, b):
+            return a*x + b
+
+        for method in ['lm', 'trf', 'dogbox']:
+            for dtx in [np.float32, np.float64]:
+                for dty in [np.float32, np.float64]:
+                    x = x.astype(dtx)
+                    y = y.astype(dty)
+
+                with warnings.catch_warnings():
+                    warnings.simplefilter("error", OptimizeWarning)
+                    p, cov = curve_fit(func, x, y, method=method)
+
+                    assert np.isfinite(cov).all()
+                    assert not np.allclose(p, 1)   # curve_fit's initial value
+
+    def test_dtypes2(self):
+        # regression test for gh-7117: curve_fit fails if
+        # both inputs are float32
+        def hyperbola(x, s_1, s_2, o_x, o_y, c):
+            b_2 = (s_1 + s_2) / 2
+            b_1 = (s_2 - s_1) / 2
+            return o_y + b_1*(x-o_x) + b_2*np.sqrt((x-o_x)**2 + c**2/4)
+
+        min_fit = np.array([-3.0, 0.0, -2.0, -10.0, 0.0])
+        max_fit = np.array([0.0, 3.0, 3.0, 0.0, 10.0])
+        guess = np.array([-2.5/3.0, 4/3.0, 1.0, -4.0, 0.5])
+
+        params = [-2, .4, -1, -5, 9.5]
+        xdata = np.array([-32, -16, -8, 4, 4, 8, 16, 32])
+        ydata = hyperbola(xdata, *params)
+
+        # run optimization twice, with xdata being float32 and float64
+        popt_64, _ = curve_fit(f=hyperbola, xdata=xdata, ydata=ydata, p0=guess,
+                               bounds=(min_fit, max_fit))
+
+        xdata = xdata.astype(np.float32)
+        ydata = hyperbola(xdata, *params)
+
+        popt_32, _ = curve_fit(f=hyperbola, xdata=xdata, ydata=ydata, p0=guess,
+                               bounds=(min_fit, max_fit))
+
+        assert_allclose(popt_32, popt_64, atol=2e-5)
+
+    def test_broadcast_y(self):
+        xdata = np.arange(10)
+        target = 4.7 * xdata ** 2 + 3.5 * xdata + np.random.rand(len(xdata))
+        def fit_func(x, a, b):
+            return a * x ** 2 + b * x - target
+        for method in ['lm', 'trf', 'dogbox']:
+            popt0, pcov0 = curve_fit(fit_func,
+                                     xdata=xdata,
+                                     ydata=np.zeros_like(xdata),
+                                     method=method)
+            popt1, pcov1 = curve_fit(fit_func,
+                                     xdata=xdata,
+                                     ydata=0,
+                                     method=method)
+            assert_allclose(pcov0, pcov1)
+
+    def test_args_in_kwargs(self):
+        # Ensure that `args` cannot be passed as keyword argument to `curve_fit`
+
+        def func(x, a, b):
+            return a * x + b
+
+        with assert_raises(ValueError):
+            curve_fit(func,
+                      xdata=[1, 2, 3, 4],
+                      ydata=[5, 9, 13, 17],
+                      p0=[1],
+                      args=(1,))
+
+    def test_data_point_number_validation(self):
+        def func(x, a, b, c, d, e):
+            return a * np.exp(-b * x) + c + d + e
+
+        with assert_raises(TypeError, match="The number of func parameters="):
+            curve_fit(func,
+                      xdata=[1, 2, 3, 4],
+                      ydata=[5, 9, 13, 17])
+
+    @pytest.mark.filterwarnings('ignore::RuntimeWarning')
+    def test_gh4555(self):
+        # gh-4555 reported that covariance matrices returned by `leastsq`
+        # can have negative diagonal elements and eigenvalues. (In fact,
+        # they can also be asymmetric.) This shows up in the output of
+        # `scipy.optimize.curve_fit`. Check that it has been resolved.giit
+        def f(x, a, b, c, d, e):
+            return a*np.log(x + 1 + b) + c*np.log(x + 1 + d) + e
+
+        rng = np.random.default_rng(408113519974467917)
+        n = 100
+        x = np.arange(n)
+        y = np.linspace(2, 7, n) + rng.random(n)
+        p, cov = optimize.curve_fit(f, x, y, maxfev=100000)
+        assert np.all(np.diag(cov) > 0)
+        eigs = linalg.eigh(cov)[0]  # separate line for debugging
+        # some platforms see a small negative eigevenvalue
+        assert np.all(eigs > -1e-2)
+        assert_allclose(cov, cov.T)
+
+    def test_gh4555b(self):
+        # check that PR gh-17247 did not significantly change covariance matrix
+        # for simple cases
+        rng = np.random.default_rng(408113519974467917)
+
+        def func(x, a, b, c):
+            return a * np.exp(-b * x) + c
+
+        xdata = np.linspace(0, 4, 50)
+        y = func(xdata, 2.5, 1.3, 0.5)
+        y_noise = 0.2 * rng.normal(size=xdata.size)
+        ydata = y + y_noise
+        _, res = curve_fit(func, xdata, ydata)
+        # reference from commit 1d80a2f254380d2b45733258ca42eb6b55c8755b
+        ref = [[+0.0158972536486215, 0.0069207183284242, -0.0007474400714749],
+               [+0.0069207183284242, 0.0205057958128679, +0.0053997711275403],
+               [-0.0007474400714749, 0.0053997711275403, +0.0027833930320877]]
+        # Linux_Python_38_32bit_full fails with default tolerance
+        assert_allclose(res, ref, 2e-7)
+
+    def test_gh13670(self):
+        # gh-13670 reported that `curve_fit` executes callables
+        # with the same values of the parameters at the beginning of
+        # optimization. Check that this has been resolved.
+
+        rng = np.random.default_rng(8250058582555444926)
+        x = np.linspace(0, 3, 101)
+        y = 2 * x + 1 + rng.normal(size=101) * 0.5
+
+        def line(x, *p):
+            assert not np.all(line.last_p == p)
+            line.last_p = p
+            return x * p[0] + p[1]
+
+        def jac(x, *p):
+            assert not np.all(jac.last_p == p)
+            jac.last_p = p
+            return np.array([x, np.ones_like(x)]).T
+
+        line.last_p = None
+        jac.last_p = None
+        p0 = np.array([1.0, 5.0])
+        curve_fit(line, x, y, p0, method='lm', jac=jac)
+
+
+class TestFixedPoint:
+
+    def test_scalar_trivial(self):
+        # f(x) = 2x; fixed point should be x=0
+        def func(x):
+            return 2.0*x
+        x0 = 1.0
+        x = fixed_point(func, x0)
+        assert_almost_equal(x, 0.0)
+
+    def test_scalar_basic1(self):
+        # f(x) = x**2; x0=1.05; fixed point should be x=1
+        def func(x):
+            return x**2
+        x0 = 1.05
+        x = fixed_point(func, x0)
+        assert_almost_equal(x, 1.0)
+
+    def test_scalar_basic2(self):
+        # f(x) = x**0.5; x0=1.05; fixed point should be x=1
+        def func(x):
+            return x**0.5
+        x0 = 1.05
+        x = fixed_point(func, x0)
+        assert_almost_equal(x, 1.0)
+
+    def test_array_trivial(self):
+        def func(x):
+            return 2.0*x
+        x0 = [0.3, 0.15]
+        with np.errstate(all='ignore'):
+            x = fixed_point(func, x0)
+        assert_almost_equal(x, [0.0, 0.0])
+
+    def test_array_basic1(self):
+        # f(x) = c * x**2; fixed point should be x=1/c
+        def func(x, c):
+            return c * x**2
+        c = array([0.75, 1.0, 1.25])
+        x0 = [1.1, 1.15, 0.9]
+        with np.errstate(all='ignore'):
+            x = fixed_point(func, x0, args=(c,))
+        assert_almost_equal(x, 1.0/c)
+
+    def test_array_basic2(self):
+        # f(x) = c * x**0.5; fixed point should be x=c**2
+        def func(x, c):
+            return c * x**0.5
+        c = array([0.75, 1.0, 1.25])
+        x0 = [0.8, 1.1, 1.1]
+        x = fixed_point(func, x0, args=(c,))
+        assert_almost_equal(x, c**2)
+
+    def test_lambertw(self):
+        # python-list/2010-December/594592.html
+        xxroot = fixed_point(lambda xx: np.exp(-2.0*xx)/2.0, 1.0,
+                args=(), xtol=1e-12, maxiter=500)
+        assert_allclose(xxroot, np.exp(-2.0*xxroot)/2.0)
+        assert_allclose(xxroot, lambertw(1)/2)
+
+    def test_no_acceleration(self):
+        # github issue 5460
+        ks = 2
+        kl = 6
+        m = 1.3
+        n0 = 1.001
+        i0 = ((m-1)/m)*(kl/ks/m)**(1/(m-1))
+
+        def func(n):
+            return np.log(kl/ks/n) / np.log(i0*n/(n - 1)) + 1
+
+        n = fixed_point(func, n0, method='iteration')
+        assert_allclose(n, m)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nnls.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nnls.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa4956febd84cce145fba488a9b6289726bf3efb
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nnls.py
@@ -0,0 +1,318 @@
+import numpy as np
+from numpy.testing import assert_allclose
+from pytest import raises as assert_raises
+from scipy.optimize import nnls
+
+
+class TestNNLS:
+    def setup_method(self):
+        self.rng = np.random.default_rng(1685225766635251)
+
+    def test_nnls(self):
+        a = np.arange(25.0).reshape(-1, 5)
+        x = np.arange(5.0)
+        y = a @ x
+        x, res = nnls(a, y)
+        assert res < 1e-7
+        assert np.linalg.norm((a @ x) - y) < 1e-7
+
+    def test_nnls_tall(self):
+        a = self.rng.uniform(low=-10, high=10, size=[50, 10])
+        x = np.abs(self.rng.uniform(low=-2, high=2, size=[10]))
+        x[::2] = 0
+        b = a @ x
+        xact, rnorm = nnls(a, b, atol=500*np.linalg.norm(a, 1)*np.spacing(1.))
+        assert_allclose(xact, x, rtol=0., atol=1e-10)
+        assert rnorm < 1e-12
+
+    def test_nnls_wide(self):
+        # If too wide then problem becomes too ill-conditioned ans starts
+        # emitting warnings, hence small m, n difference.
+        a = self.rng.uniform(low=-10, high=10, size=[100, 120])
+        x = np.abs(self.rng.uniform(low=-2, high=2, size=[120]))
+        x[::2] = 0
+        b = a @ x
+        xact, rnorm = nnls(a, b, atol=500*np.linalg.norm(a, 1)*np.spacing(1.))
+        assert_allclose(xact, x, rtol=0., atol=1e-10)
+        assert rnorm < 1e-12
+
+    def test_maxiter(self):
+        # test that maxiter argument does stop iterations
+        a = self.rng.uniform(size=(5, 10))
+        b = self.rng.uniform(size=5)
+        with assert_raises(RuntimeError):
+            nnls(a, b, maxiter=1)
+
+    def test_nnls_inner_loop_case1(self):
+        # See gh-20168
+        n = np.array(
+            [3, 2, 0, 1, 1, 1, 3, 8, 14, 16, 29, 23, 41, 47, 53, 57, 67, 76,
+             103, 89, 97, 94, 85, 95, 78, 78, 78, 77, 73, 50, 50, 56, 68, 98,
+             95, 112, 134, 145, 158, 172, 213, 234, 222, 215, 216, 216, 206,
+             183, 135, 156, 110, 92, 63, 60, 52, 29, 20, 16, 12, 5, 5, 5, 1, 2,
+             3, 0, 2])
+        k = np.array(
+            [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+             0., 0., 0., 0.7205812007860187, 0., 1.4411624015720375,
+             0.7205812007860187, 2.882324803144075, 5.76464960628815,
+             5.76464960628815, 12.249880413362318, 15.132205216506394,
+             20.176273622008523, 27.382085629868712, 48.27894045266326,
+             47.558359251877235, 68.45521407467177, 97.99904330689854,
+             108.0871801179028, 135.46926574777152, 140.51333415327366,
+             184.4687874012208, 171.49832578707245, 205.36564222401535,
+             244.27702706646033, 214.01261663344755, 228.42424064916793,
+             232.02714665309804, 205.36564222401535, 172.9394881886445,
+             191.67459940908097, 162.1307701768542, 153.48379576742198,
+             110.96950492104689, 103.04311171240067, 86.46974409432225,
+             60.528820866025576, 43.234872047161126, 23.779179625938617,
+             24.499760826724636, 17.29394881886445, 11.5292992125763,
+             5.76464960628815, 5.044068405502131, 3.6029060039300935, 0.,
+             2.882324803144075, 0., 0., 0.])
+        d = np.array(
+            [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+             0., 0., 0., 0.003889242101538, 0., 0.007606268390096, 0.,
+             0.025457371599973, 0.036952882091577, 0., 0.08518359183449,
+             0.048201126400243, 0.196234990022205, 0.144116240157247,
+             0.171145134062442, 0., 0., 0.269555036538714, 0., 0., 0.,
+             0.010893241091872, 0., 0., 0., 0., 0., 0., 0., 0.,
+             0.048167058272886, 0.011238724891049, 0., 0., 0.055162603456078,
+             0., 0., 0., 0., 0.027753339088588, 0., 0., 0., 0., 0., 0., 0., 0.,
+             0., 0.])
+        # The following code sets up a system of equations such that
+        # $k_i-p_i*n_i$ is minimized for $p_i$ with weights $n_i$ and
+        # monotonicity constraints on $p_i$. This translates to a system of
+        # equations of the form $k_i - (d_1 + ... + d_i) * n_i$ and
+        # non-negativity constraints on the $d_i$. If $n_i$ is zero the
+        # system is modified such that $d_i - d_{i+1}$ is then minimized.
+        N = len(n)
+        A = np.diag(n) @ np.tril(np.ones((N, N)))
+        w = n ** 0.5
+
+        nz = (n == 0).nonzero()[0]
+        A[nz, nz] = 1
+        A[nz, np.minimum(nz + 1, N - 1)] = -1
+        w[nz] = 1
+        k[nz] = 0
+        W = np.diag(w)
+
+        # Small perturbations can already make the infinite loop go away (just
+        # uncomment the next line)
+        k = k + 1e-10 * np.random.normal(size=N)
+        dact, _ = nnls(W @ A, W @ k)
+        assert_allclose(dact, d, rtol=0., atol=1e-10)
+
+    def test_nnls_inner_loop_case2(self):
+        # See gh-20168
+        n = np.array(
+            [1, 0, 1, 2, 2, 2, 3, 3, 5, 4, 14, 14, 19, 26, 36, 42, 36, 64, 64,
+             64, 81, 85, 85, 95, 95, 95, 75, 76, 69, 81, 62, 59, 68, 64, 71, 67,
+             74, 78, 118, 135, 153, 159, 210, 195, 218, 243, 236, 215, 196, 175,
+             185, 149, 144, 103, 104, 75, 56, 40, 32, 26, 17, 9, 12, 8, 2, 1, 1,
+             1])
+        k = np.array(
+            [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+             0., 0., 0., 0., 0., 0.7064355064917867, 0., 0., 2.11930651947536,
+             0.7064355064917867, 0., 3.5321775324589333, 7.064355064917867,
+             11.302968103868587, 16.95445215580288, 20.486629688261814,
+             20.486629688261814, 37.44108184406469, 55.808405012851146,
+             78.41434122058831, 103.13958394780086, 105.965325973768,
+             125.74552015553803, 149.057891869767, 176.60887662294667,
+             197.09550631120848, 211.930651947536, 204.86629688261814,
+             233.8301526487814, 221.1143135319292, 195.6826352982249,
+             197.80194181770025, 191.4440222592742, 187.91184472681525,
+             144.11284332432447, 131.39700420747232, 116.5618585711448,
+             93.24948685691584, 89.01087381796512, 53.68909849337579,
+             45.211872415474346, 31.083162285638615, 24.72524272721253,
+             16.95445215580288, 9.890097090885014, 9.890097090885014,
+             2.8257420259671466, 2.8257420259671466, 1.4128710129835733,
+             0.7064355064917867, 1.4128710129835733])
+        d = np.array(
+            [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+             0., 0., 0., 0., 0., 0.0021916146355674473, 0., 0.,
+             0.011252740799789484, 0., 0., 0.037746623295934395,
+             0.03602328132946222, 0.09509167709829734, 0.10505765870204821,
+             0.01391037014274718, 0.0188296228752321, 0.20723559202324254,
+             0.3056220879462608, 0.13304643490426477, 0., 0., 0., 0., 0., 0.,
+             0., 0., 0., 0., 0., 0.043185876949706214, 0.0037266261379722554,
+             0., 0., 0., 0., 0., 0.094797899357143, 0., 0., 0., 0., 0., 0., 0.,
+             0., 0.23450935613672663, 0., 0., 0.07064355064917871])
+        # The following code sets up a system of equations such that
+        # $k_i-p_i*n_i$ is minimized for $p_i$ with weights $n_i$ and
+        # monotonicity constraints on $p_i$. This translates to a system of
+        # equations of the form $k_i - (d_1 + ... + d_i) * n_i$ and
+        # non-negativity constraints on the $d_i$. If $n_i$ is zero the
+        # system is modified such that $d_i - d_{i+1}$ is then minimized.
+        N = len(n)
+        A = np.diag(n) @ np.tril(np.ones((N, N)))
+        w = n ** 0.5
+
+        nz = (n == 0).nonzero()[0]
+        A[nz, nz] = 1
+        A[nz, np.minimum(nz + 1, N - 1)] = -1
+        w[nz] = 1
+        k[nz] = 0
+        W = np.diag(w)
+
+        dact, _ = nnls(W @ A, W @ k, atol=1e-7)
+
+        p = np.cumsum(dact)
+        assert np.all(dact >= 0)
+        assert np.linalg.norm(k - n * p, ord=np.inf) < 28
+        assert_allclose(dact, d, rtol=0., atol=1e-10)
+
+    def test_nnls_gh20302(self):
+        # See gh-20302
+        A = np.array(
+            [0.33408569134321575, 0.11136189711440525, 0.049140798007949286,
+             0.03712063237146841, 0.055680948557202625, 0.16642814595936478,
+             0.11095209730624318, 0.09791993030943345, 0.14793612974165757,
+             0.44380838922497273, 0.11099502671044059, 0.11099502671044059,
+             0.14693672599330593, 0.3329850801313218, 1.498432860590948,
+             0.0832374225132955, 0.11098323001772734, 0.19589481249472837,
+             0.5919105600945457, 3.5514633605672747, 0.06658716751427037,
+             0.11097861252378394, 0.24485832778293645, 0.9248217710315328,
+             6.936163282736496, 0.05547609388181014, 0.11095218776362029,
+             0.29376003042571264, 1.3314262531634435, 11.982836278470993,
+             0.047506113282944136, 0.11084759766020298, 0.3423969672933396,
+             1.8105107617833156, 19.010362998724812, 0.041507335004505576,
+             0.11068622667868154, 0.39074115283013344, 2.361306169145206,
+             28.335674029742474, 0.03682846280947718, 0.11048538842843154,
+             0.4387861797121048, 2.9831054875676517, 40.2719240821633,
+             0.03311278164362387, 0.11037593881207958, 0.4870572300443105,
+             3.6791979604026523, 55.187969406039784, 0.030079304092299915,
+             0.11029078167176636, 0.5353496017200152, 4.448394860761242,
+             73.3985152025605, 0.02545939709595835, 0.11032405408248619,
+             0.6328767609778363, 6.214921713313388, 121.19097340961108,
+             0.022080881724881523, 0.11040440862440762, 0.7307742886903428,
+             8.28033064683057, 186.30743955368786, 0.020715838214945492,
+             0.1104844704797093, 0.7800578384588346, 9.42800814760186,
+             226.27219554244465, 0.01843179728340054, 0.11059078370040323,
+             0.8784095015912599, 11.94380463964355, 322.48272527037585,
+             0.015812787653789077, 0.11068951357652354, 1.0257259848595766,
+             16.27135849574896, 512.5477926160922, 0.014438550529330062,
+             0.11069555405819713, 1.1234754801775881, 19.519316032262093,
+             673.4164031130423, 0.012760770585072577, 0.110593345070629,
+             1.2688431112524712, 24.920367089248398, 971.8943164806875,
+             0.011427556646114315, 0.11046638091243838, 1.413623342459821,
+             30.967408782453557, 1347.0822820367298, 0.010033330264470307,
+             0.11036663290917338, 1.6071533470570285, 40.063087746029936,
+             1983.122843428482, 0.008950061496507258, 0.11038409179025618,
+             1.802244865119193, 50.37194055362024, 2795.642700725923,
+             0.008071078821135658, 0.11030474388885401, 1.9956465761433504,
+             61.80742482572119, 3801.1566267818534, 0.007191031207777556,
+             0.11026247851925586, 2.238160187262168, 77.7718015155818,
+             5366.2543045751445, 0.00636834224248, 0.11038459886965334,
+             2.5328963107984297, 99.49331844784753, 7760.4788389321075,
+             0.005624259098118485, 0.11061042892966355, 2.879742607664547,
+             128.34496770138628, 11358.529641572684, 0.0050354270614989555,
+             0.11077939535297703, 3.2263279459292575, 160.85168205252265,
+             15924.316523199741, 0.0044997853165982555, 0.1109947044760903,
+             3.6244287189055613, 202.60233390369015, 22488.859063309606,
+             0.004023601950058174, 0.1113196539516095, 4.07713905729421,
+             255.6270320242126, 31825.565487014468, 0.0036024117873727094,
+             0.111674765408554, 4.582933773135057, 321.9583486728612,
+             44913.18963986413, 0.003201503089582304, 0.11205260813538065,
+             5.191786833370116, 411.79333489752383, 64857.45024636,
+             0.0028633044552448853, 0.11262330857296549, 5.864295861648949,
+             522.7223161899905, 92521.84996562831, 0.0025691897303891965,
+             0.11304434813712465, 6.584584405106342, 656.5615739804199,
+             129999.19164812315, 0.0022992911894424675, 0.11343169867916175,
+             7.4080129906658305, 828.2026426227864, 183860.98666225857,
+             0.0020449922071108764, 0.11383789952917212, 8.388975556433872,
+             1058.2750599896935, 265097.9025274183, 0.001831274615120854,
+             0.11414945100919989, 9.419351803810935, 1330.564050780237,
+             373223.2162438565, 0.0016363333454631633, 0.11454333418242145,
+             10.6143816579462, 1683.787012481595, 530392.9089317025,
+             0.0014598610433380044, 0.11484240207592301, 11.959688127956882,
+             2132.0874753402027, 754758.9662704318, 0.0012985240015312626,
+             0.11513579480243862, 13.514425358573531, 2715.5160990137824,
+             1083490.9235064993, 0.0011614735761289934, 0.11537304189548002,
+             15.171418602667567, 3415.195870828736, 1526592.554260445,
+             0.0010347472698811352, 0.11554677847006009, 17.080800985009617,
+             4322.412404600832, 2172012.2333119176, 0.0009232988811258664,
+             0.1157201264344419, 19.20004861829407, 5453.349531598553,
+             3075689.135821584, 0.0008228871862975205, 0.11602709326795038,
+             21.65735242414206, 6920.203923780365, 4390869.389638642,
+             0.00073528900066722, 0.11642075843897651, 24.40223571298994,
+             8755.811207598026, 6238515.485413593, 0.0006602764384729194,
+             0.11752920604817965, 27.694443541914293, 11171.386093291572,
+             8948280.260726549, 0.0005935538977939806, 0.11851292825953147,
+             31.325508920763063, 14174.185724149384, 12735505.873148222,
+             0.0005310755355633124, 0.11913794514470308, 35.381052949627765,
+             17987.010118815077, 18157886.71494382, 0.00047239949671590953,
+             0.1190446731724092, 39.71342528048061, 22679.438775422022,
+             25718483.571328573, 0.00041829129789387623, 0.11851586773659825,
+             44.45299332965028, 28542.57147989741, 36391778.63686921,
+             0.00037321512015419886, 0.11880681324908665, 50.0668539579632,
+             36118.26128449941, 51739409.29004541, 0.0003315539616702064,
+             0.1184752823034871, 56.04387059062639, 45383.29960621684,
+             72976345.76679668, 0.00029456064937920213, 0.11831519416731286,
+             62.91195073220101, 57265.53993693082, 103507463.43600245,
+             0.00026301867496859703, 0.11862142241083726, 70.8217262087034,
+             72383.14781936012, 146901598.49939138, 0.00023618734450420032,
+             0.11966825454879482, 80.26535457124461, 92160.51176984518,
+             210125966.835247, 0.00021165918071578316, 0.12043407382728061,
+             90.7169587544247, 116975.56852918258, 299515943.218972,
+             0.00018757727511329545, 0.11992440455576689, 101.49899864101785,
+             147056.26174166967, 423080865.0307836, 0.00016654469159895833,
+             0.11957908856805206, 113.65970431102812, 184937.67016486943,
+             597533612.3026931, 0.00014717439179415048, 0.11872067604728138,
+             126.77899683346702, 231758.58906776624, 841283678.3159915,
+             0.00012868496382376066, 0.1166314722122684, 139.93635237349534,
+             287417.30847929465, 1172231492.6328032, 0.00011225559452625302,
+             0.11427619522772557, 154.0034283704458, 355281.4912295324,
+             1627544511.322488, 9.879511142981067e-05, 0.11295574406808354,
+             170.96532050841535, 442971.0111288653, 2279085852.2580123,
+             8.71257780313587e-05, 0.11192758284428547, 190.35067416684697,
+             554165.2523674504, 3203629323.93623, 7.665069027765277e-05,
+             0.11060694607065294, 211.28835951100046, 690933.608546013,
+             4486577387.093535, 6.734021094824451e-05, 0.10915848194710433,
+             234.24338803525194, 860487.9079859136, 6276829044.8032465,
+             5.9191625040287665e-05, 0.10776821865668373, 259.7454711820425,
+             1071699.0387579766, 8780430224.544102, 5.1856803674907676e-05,
+             0.10606444911641115, 287.1843540288165, 1331126.3723998806,
+             12251687131.5685, 4.503421404759231e-05, 0.10347361247668461,
+             314.7338642485931, 1638796.0697522392, 16944331963.203278,
+             3.90470387455642e-05, 0.1007804070023012, 344.3427560918527,
+             2014064.4865519698, 23392351979.057854, 3.46557661636393e-05,
+             0.10046706610839032, 385.56603915081587, 2533036.2523656,
+             33044724430.235435, 3.148745865254635e-05, 0.1025441570117926,
+             442.09038234164746, 3262712.3882769793, 47815050050.199135,
+             2.9790762078715404e-05, 0.1089845379379672, 527.8068231298969,
+             4375751.903321453, 72035815708.42941, 2.8772639817606534e-05,
+             0.11823636789048445, 643.2048194503195, 5989838.001888927,
+             110764084330.93005, 2.7951691815106586e-05, 0.12903432664913705,
+             788.5500418523591, 8249371.000613411, 171368308481.2427,
+             2.6844392423114212e-05, 0.1392060709754626, 955.6296403631383,
+             11230229.319931043, 262063016295.25085, 2.499458273851386e-05,
+             0.14559344445184325, 1122.7022399726002, 14820229.698461473,
+             388475270970.9214, 2.337386729019776e-05, 0.15294300496886065,
+             1324.8158105672455, 19644861.137128454, 578442936182.7473,
+             2.0081014872174113e-05, 0.14760215298210377, 1436.2385042492353,
+             23923681.729276657, 791311658718.4193, 1.773374462991839e-05,
+             0.14642752940923615, 1600.5596278736678, 29949429.82503553,
+             1112815989293.9326, 1.5303115839590797e-05, 0.14194150045081785,
+             1742.873058605698, 36634451.931305364, 1529085389160.7544,
+             1.3148448731163076e-05, 0.13699368732998807, 1889.5284359054356,
+             44614279.74469635, 2091762812969.9607, 1.1739194407590062e-05,
+             0.13739553134643406, 2128.794599579694, 56462810.11822766,
+             2973783283306.8145, 1.0293367506254706e-05, 0.13533033372723272,
+             2355.372854690074, 70176508.28667311, 4151852759764.441,
+             9.678312586863569e-06, 0.14293577249119244, 2794.531827932675,
+             93528671.31952812, 6215821967224.52, -1.174086323572049e-05,
+             0.1429501325944908, 3139.4804810720925, 118031680.16618933,
+             -6466892421886.174, -2.1188265307407812e-05, 0.1477108290912869,
+             3644.1133424610953, 153900132.62392554, -4828013117542.036,
+             -8.614483025123122e-05, 0.16037100755883044, 4444.386620899393,
+             210846007.89660168, -1766340937974.433, 4.981445776141726e-05,
+             0.16053420251962536, 4997.558254401547, 266327328.4755411,
+             3862250287024.725, 1.8500019169456637e-05, 0.15448417164977674,
+             5402.289867444643, 323399508.1475582, 12152445411933.408,
+             -5.647882376069748e-05, 0.1406372975946189, 5524.633133597753,
+             371512945.9909363, -4162951345292.1514, 2.8048523486337994e-05,
+             0.13183417571186926, 5817.462495763679, 439447252.3728975,
+             9294740538175.03]).reshape(89, 5)
+        b = np.ones(89, dtype=np.float64)
+        sol, rnorm = nnls(A, b)
+        assert_allclose(sol, np.array([0.61124315, 8.22262829, 0., 0., 0.]))
+        assert_allclose(rnorm, 1.0556460808977297)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nonlin.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nonlin.py
new file mode 100644
index 0000000000000000000000000000000000000000..d65a86198972df00842455462d5713924b44f182
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_nonlin.py
@@ -0,0 +1,534 @@
+""" Unit tests for nonlinear solvers
+Author: Ondrej Certik
+May 2007
+"""
+from numpy.testing import assert_
+import pytest
+
+from scipy.optimize import _nonlin as nonlin, root
+from scipy.sparse import csr_array
+from numpy import diag, dot
+from numpy.linalg import inv
+import numpy as np
+import scipy
+
+from .test_minpack import pressure_network
+
+SOLVERS = {'anderson': nonlin.anderson,
+           'diagbroyden': nonlin.diagbroyden,
+           'linearmixing': nonlin.linearmixing,
+           'excitingmixing': nonlin.excitingmixing,
+           'broyden1': nonlin.broyden1,
+           'broyden2': nonlin.broyden2,
+           'krylov': nonlin.newton_krylov}
+MUST_WORK = {'anderson': nonlin.anderson, 'broyden1': nonlin.broyden1,
+             'broyden2': nonlin.broyden2, 'krylov': nonlin.newton_krylov}
+
+# ----------------------------------------------------------------------------
+# Test problems
+# ----------------------------------------------------------------------------
+
+
+def F(x):
+    x = np.asarray(x).T
+    d = diag([3, 2, 1.5, 1, 0.5])
+    c = 0.01
+    f = -d @ x - c * float(x.T @ x) * x
+    return f
+
+
+F.xin = [1, 1, 1, 1, 1]
+F.KNOWN_BAD = {}
+F.JAC_KSP_BAD = {}
+F.ROOT_JAC_KSP_BAD = {}
+
+
+def F2(x):
+    return x
+
+
+F2.xin = [1, 2, 3, 4, 5, 6]
+F2.KNOWN_BAD = {'linearmixing': nonlin.linearmixing,
+                'excitingmixing': nonlin.excitingmixing}
+F2.JAC_KSP_BAD = {}
+F2.ROOT_JAC_KSP_BAD = {}
+
+
+def F2_lucky(x):
+    return x
+
+
+F2_lucky.xin = [0, 0, 0, 0, 0, 0]
+F2_lucky.KNOWN_BAD = {}
+F2_lucky.JAC_KSP_BAD = {}
+F2_lucky.ROOT_JAC_KSP_BAD = {}
+
+
+def F3(x):
+    A = np.array([[-2, 1, 0.], [1, -2, 1], [0, 1, -2]])
+    b = np.array([1, 2, 3.])
+    return A @ x - b
+
+
+F3.xin = [1, 2, 3]
+F3.KNOWN_BAD = {}
+F3.JAC_KSP_BAD = {}
+F3.ROOT_JAC_KSP_BAD = {}
+
+
+def F4_powell(x):
+    A = 1e4
+    return [A*x[0]*x[1] - 1, np.exp(-x[0]) + np.exp(-x[1]) - (1 + 1/A)]
+
+
+F4_powell.xin = [-1, -2]
+F4_powell.KNOWN_BAD = {'linearmixing': nonlin.linearmixing,
+                       'excitingmixing': nonlin.excitingmixing,
+                       'diagbroyden': nonlin.diagbroyden}
+# In the extreme case, it does not converge for nolinear problem solved by
+# MINRES and root problem solved by GMRES/BiCGStab/CGS/MINRES/TFQMR when using
+# Krylov method to approximate Jacobian
+F4_powell.JAC_KSP_BAD = {'minres'}
+F4_powell.ROOT_JAC_KSP_BAD = {'gmres', 'bicgstab', 'cgs', 'minres', 'tfqmr'}
+
+
+def F5(x):
+    return pressure_network(x, 4, np.array([.5, .5, .5, .5]))
+
+
+F5.xin = [2., 0, 2, 0]
+F5.KNOWN_BAD = {'excitingmixing': nonlin.excitingmixing,
+                'linearmixing': nonlin.linearmixing,
+                'diagbroyden': nonlin.diagbroyden}
+# In the extreme case, the Jacobian inversion yielded zero vector for nonlinear
+# problem solved by CGS/MINRES and it does not converge for root problem solved
+# by MINRES and when using Krylov method to approximate Jacobian
+F5.JAC_KSP_BAD = {'cgs', 'minres'}
+F5.ROOT_JAC_KSP_BAD = {'minres'}
+
+
+def F6(x):
+    x1, x2 = x
+    J0 = np.array([[-4.256, 14.7],
+                   [0.8394989, 0.59964207]])
+    v = np.array([(x1 + 3) * (x2**5 - 7) + 3*6,
+                  np.sin(x2 * np.exp(x1) - 1)])
+    return -np.linalg.solve(J0, v)
+
+
+F6.xin = [-0.5, 1.4]
+F6.KNOWN_BAD = {'excitingmixing': nonlin.excitingmixing,
+                'linearmixing': nonlin.linearmixing,
+                'diagbroyden': nonlin.diagbroyden}
+F6.JAC_KSP_BAD = {}
+F6.ROOT_JAC_KSP_BAD = {}
+
+
+# ----------------------------------------------------------------------------
+# Tests
+# ----------------------------------------------------------------------------
+
+
+class TestNonlin:
+    """
+    Check the Broyden methods for a few test problems.
+
+    broyden1, broyden2, and newton_krylov must succeed for
+    all functions. Some of the others don't -- tests in KNOWN_BAD are skipped.
+
+    """
+
+    def _check_nonlin_func(self, f, func, f_tol=1e-2):
+        # Test all methods mentioned in the class `KrylovJacobian`
+        if func == SOLVERS['krylov']:
+            for method in ['gmres', 'bicgstab', 'cgs', 'minres', 'tfqmr']:
+                if method in f.JAC_KSP_BAD:
+                    continue
+
+                x = func(f, f.xin, method=method, line_search=None,
+                         f_tol=f_tol, maxiter=200, verbose=0)
+                assert_(np.absolute(f(x)).max() < f_tol)
+
+        x = func(f, f.xin, f_tol=f_tol, maxiter=200, verbose=0)
+        assert_(np.absolute(f(x)).max() < f_tol)
+
+    def _check_root(self, f, method, f_tol=1e-2):
+        # Test Krylov methods
+        if method == 'krylov':
+            for jac_method in ['gmres', 'bicgstab', 'cgs', 'minres', 'tfqmr']:
+                if jac_method in f.ROOT_JAC_KSP_BAD:
+                    continue
+
+                res = root(f, f.xin, method=method,
+                           options={'ftol': f_tol, 'maxiter': 200,
+                                    'disp': 0,
+                                    'jac_options': {'method': jac_method}})
+                assert_(np.absolute(res.fun).max() < f_tol)
+
+        res = root(f, f.xin, method=method,
+                   options={'ftol': f_tol, 'maxiter': 200, 'disp': 0})
+        assert_(np.absolute(res.fun).max() < f_tol)
+
+    @pytest.mark.xfail
+    def _check_func_fail(self, *a, **kw):
+        pass
+
+    @pytest.mark.filterwarnings('ignore::DeprecationWarning')
+    def test_problem_nonlin(self):
+        for f in [F, F2, F2_lucky, F3, F4_powell, F5, F6]:
+            for func in SOLVERS.values():
+                if func in f.KNOWN_BAD.values():
+                    if func in MUST_WORK.values():
+                        self._check_func_fail(f, func)
+                    continue
+                self._check_nonlin_func(f, func)
+
+    @pytest.mark.filterwarnings('ignore::DeprecationWarning')
+    @pytest.mark.parametrize("method", ['lgmres', 'gmres', 'bicgstab', 'cgs',
+                                        'minres', 'tfqmr'])
+    def test_tol_norm_called(self, method):
+        # Check that supplying tol_norm keyword to nonlin_solve works
+        self._tol_norm_used = False
+
+        def local_norm_func(x):
+            self._tol_norm_used = True
+            return np.absolute(x).max()
+
+        nonlin.newton_krylov(F, F.xin, method=method, f_tol=1e-2,
+                             maxiter=200, verbose=0,
+                             tol_norm=local_norm_func)
+        assert_(self._tol_norm_used)
+
+    @pytest.mark.filterwarnings('ignore::DeprecationWarning')
+    def test_problem_root(self):
+        for f in [F, F2, F2_lucky, F3, F4_powell, F5, F6]:
+            for meth in SOLVERS:
+                if meth in f.KNOWN_BAD:
+                    if meth in MUST_WORK:
+                        self._check_func_fail(f, meth)
+                    continue
+                self._check_root(f, meth)
+
+    def test_no_convergence(self):
+        def wont_converge(x):
+            return 1e3 + x
+        
+        with pytest.raises(scipy.optimize.NoConvergence):
+            nonlin.newton_krylov(wont_converge, xin=[0], maxiter=1)
+
+
+class TestSecant:
+    """Check that some Jacobian approximations satisfy the secant condition"""
+
+    xs = [np.array([1., 2., 3., 4., 5.]),
+          np.array([2., 3., 4., 5., 1.]),
+          np.array([3., 4., 5., 1., 2.]),
+          np.array([4., 5., 1., 2., 3.]),
+          np.array([9., 1., 9., 1., 3.]),
+          np.array([0., 1., 9., 1., 3.]),
+          np.array([5., 5., 7., 1., 1.]),
+          np.array([1., 2., 7., 5., 1.]),]
+    fs = [x**2 - 1 for x in xs]
+
+    def _check_secant(self, jac_cls, npoints=1, **kw):
+        """
+        Check that the given Jacobian approximation satisfies secant
+        conditions for last `npoints` points.
+        """
+        jac = jac_cls(**kw)
+        jac.setup(self.xs[0], self.fs[0], None)
+        for j, (x, f) in enumerate(zip(self.xs[1:], self.fs[1:])):
+            jac.update(x, f)
+
+            for k in range(min(npoints, j+1)):
+                dx = self.xs[j-k+1] - self.xs[j-k]
+                df = self.fs[j-k+1] - self.fs[j-k]
+                assert_(np.allclose(dx, jac.solve(df)))
+
+            # Check that the `npoints` secant bound is strict
+            if j >= npoints:
+                dx = self.xs[j-npoints+1] - self.xs[j-npoints]
+                df = self.fs[j-npoints+1] - self.fs[j-npoints]
+                assert_(not np.allclose(dx, jac.solve(df)))
+
+    def test_broyden1(self):
+        self._check_secant(nonlin.BroydenFirst)
+
+    def test_broyden2(self):
+        self._check_secant(nonlin.BroydenSecond)
+
+    def test_broyden1_update(self):
+        # Check that BroydenFirst update works as for a dense matrix
+        jac = nonlin.BroydenFirst(alpha=0.1)
+        jac.setup(self.xs[0], self.fs[0], None)
+
+        B = np.identity(5) * (-1/0.1)
+
+        for last_j, (x, f) in enumerate(zip(self.xs[1:], self.fs[1:])):
+            df = f - self.fs[last_j]
+            dx = x - self.xs[last_j]
+            B += (df - dot(B, dx))[:, None] * dx[None, :] / dot(dx, dx)
+            jac.update(x, f)
+            assert_(np.allclose(jac.todense(), B, rtol=1e-10, atol=1e-13))
+
+    def test_broyden2_update(self):
+        # Check that BroydenSecond update works as for a dense matrix
+        jac = nonlin.BroydenSecond(alpha=0.1)
+        jac.setup(self.xs[0], self.fs[0], None)
+
+        H = np.identity(5) * (-0.1)
+
+        for last_j, (x, f) in enumerate(zip(self.xs[1:], self.fs[1:])):
+            df = f - self.fs[last_j]
+            dx = x - self.xs[last_j]
+            H += (dx - dot(H, df))[:, None] * df[None, :] / dot(df, df)
+            jac.update(x, f)
+            assert_(np.allclose(jac.todense(), inv(H), rtol=1e-10, atol=1e-13))
+
+    def test_anderson(self):
+        # Anderson mixing (with w0=0) satisfies secant conditions
+        # for the last M iterates, see [Ey]_
+        #
+        # .. [Ey] V. Eyert, J. Comp. Phys., 124, 271 (1996).
+        self._check_secant(nonlin.Anderson, M=3, w0=0, npoints=3)
+
+
+class TestLinear:
+    """Solve a linear equation;
+    some methods find the exact solution in a finite number of steps"""
+
+    def _check(self, jac, N, maxiter, complex=False, **kw):
+        np.random.seed(123)
+
+        A = np.random.randn(N, N)
+        if complex:
+            A = A + 1j*np.random.randn(N, N)
+        b = np.random.randn(N)
+        if complex:
+            b = b + 1j*np.random.randn(N)
+
+        def func(x):
+            return dot(A, x) - b
+
+        sol = nonlin.nonlin_solve(func, np.zeros(N), jac, maxiter=maxiter,
+                                  f_tol=1e-6, line_search=None, verbose=0)
+        assert_(np.allclose(dot(A, sol), b, atol=1e-6))
+
+    def test_broyden1(self):
+        # Broyden methods solve linear systems exactly in 2*N steps
+        self._check(nonlin.BroydenFirst(alpha=1.0), 20, 41, False)
+        self._check(nonlin.BroydenFirst(alpha=1.0), 20, 41, True)
+
+    def test_broyden2(self):
+        # Broyden methods solve linear systems exactly in 2*N steps
+        self._check(nonlin.BroydenSecond(alpha=1.0), 20, 41, False)
+        self._check(nonlin.BroydenSecond(alpha=1.0), 20, 41, True)
+
+    def test_anderson(self):
+        # Anderson is rather similar to Broyden, if given enough storage space
+        self._check(nonlin.Anderson(M=50, alpha=1.0), 20, 29, False)
+        self._check(nonlin.Anderson(M=50, alpha=1.0), 20, 29, True)
+
+    def test_krylov(self):
+        # Krylov methods solve linear systems exactly in N inner steps
+        self._check(nonlin.KrylovJacobian, 20, 2, False, inner_m=10)
+        self._check(nonlin.KrylovJacobian, 20, 2, True, inner_m=10)
+
+    def _check_autojac(self, A, b):
+        def func(x):
+            return A.dot(x) - b
+
+        def jac(v):
+            return A
+
+        sol = nonlin.nonlin_solve(func, np.zeros(b.shape[0]), jac, maxiter=2,
+                                  f_tol=1e-6, line_search=None, verbose=0)
+        np.testing.assert_allclose(A @ sol, b, atol=1e-6)
+        # test jac input as array -- not a function
+        sol = nonlin.nonlin_solve(func, np.zeros(b.shape[0]), A, maxiter=2,
+                                  f_tol=1e-6, line_search=None, verbose=0)
+        np.testing.assert_allclose(A @ sol, b, atol=1e-6)
+
+    def test_jac_sparse(self):
+        A = csr_array([[1, 2], [2, 1]])
+        b = np.array([1, -1])
+        self._check_autojac(A, b)
+        self._check_autojac((1 + 2j) * A, (2 + 2j) * b)
+
+    def test_jac_ndarray(self):
+        A = np.array([[1, 2], [2, 1]])
+        b = np.array([1, -1])
+        self._check_autojac(A, b)
+        self._check_autojac((1 + 2j) * A, (2 + 2j) * b)
+
+
+class TestJacobianDotSolve:
+    """
+    Check that solve/dot methods in Jacobian approximations are consistent
+    """
+
+    def _func(self, x):
+        return x**2 - 1 + np.dot(self.A, x)
+
+    def _check_dot(self, jac_cls, complex=False, tol=1e-6, **kw):
+        np.random.seed(123)
+
+        N = 7
+
+        def rand(*a):
+            q = np.random.rand(*a)
+            if complex:
+                q = q + 1j*np.random.rand(*a)
+            return q
+
+        def assert_close(a, b, msg):
+            d = abs(a - b).max()
+            f = tol + abs(b).max()*tol
+            if d > f:
+                raise AssertionError(f'{msg}: err {d:g}')
+
+        self.A = rand(N, N)
+
+        # initialize
+        x0 = np.random.rand(N)
+        jac = jac_cls(**kw)
+        jac.setup(x0, self._func(x0), self._func)
+
+        # check consistency
+        for k in range(2*N):
+            v = rand(N)
+
+            if hasattr(jac, '__array__'):
+                Jd = np.array(jac)
+                if hasattr(jac, 'solve'):
+                    Gv = jac.solve(v)
+                    Gv2 = np.linalg.solve(Jd, v)
+                    assert_close(Gv, Gv2, 'solve vs array')
+                if hasattr(jac, 'rsolve'):
+                    Gv = jac.rsolve(v)
+                    Gv2 = np.linalg.solve(Jd.T.conj(), v)
+                    assert_close(Gv, Gv2, 'rsolve vs array')
+                if hasattr(jac, 'matvec'):
+                    Jv = jac.matvec(v)
+                    Jv2 = np.dot(Jd, v)
+                    assert_close(Jv, Jv2, 'dot vs array')
+                if hasattr(jac, 'rmatvec'):
+                    Jv = jac.rmatvec(v)
+                    Jv2 = np.dot(Jd.T.conj(), v)
+                    assert_close(Jv, Jv2, 'rmatvec vs array')
+
+            if hasattr(jac, 'matvec') and hasattr(jac, 'solve'):
+                Jv = jac.matvec(v)
+                Jv2 = jac.solve(jac.matvec(Jv))
+                assert_close(Jv, Jv2, 'dot vs solve')
+
+            if hasattr(jac, 'rmatvec') and hasattr(jac, 'rsolve'):
+                Jv = jac.rmatvec(v)
+                Jv2 = jac.rmatvec(jac.rsolve(Jv))
+                assert_close(Jv, Jv2, 'rmatvec vs rsolve')
+
+            x = rand(N)
+            jac.update(x, self._func(x))
+
+    def test_broyden1(self):
+        self._check_dot(nonlin.BroydenFirst, complex=False)
+        self._check_dot(nonlin.BroydenFirst, complex=True)
+
+    def test_broyden2(self):
+        self._check_dot(nonlin.BroydenSecond, complex=False)
+        self._check_dot(nonlin.BroydenSecond, complex=True)
+
+    def test_anderson(self):
+        self._check_dot(nonlin.Anderson, complex=False)
+        self._check_dot(nonlin.Anderson, complex=True)
+
+    def test_diagbroyden(self):
+        self._check_dot(nonlin.DiagBroyden, complex=False)
+        self._check_dot(nonlin.DiagBroyden, complex=True)
+
+    def test_linearmixing(self):
+        self._check_dot(nonlin.LinearMixing, complex=False)
+        self._check_dot(nonlin.LinearMixing, complex=True)
+
+    def test_excitingmixing(self):
+        self._check_dot(nonlin.ExcitingMixing, complex=False)
+        self._check_dot(nonlin.ExcitingMixing, complex=True)
+
+    def test_krylov(self):
+        self._check_dot(nonlin.KrylovJacobian, complex=False, tol=1e-3)
+        self._check_dot(nonlin.KrylovJacobian, complex=True, tol=1e-3)
+
+
+class TestNonlinOldTests:
+    """ Test case for a simple constrained entropy maximization problem
+    (the machine translation example of Berger et al in
+    Computational Linguistics, vol 22, num 1, pp 39--72, 1996.)
+    """
+
+    def test_broyden1(self):
+        x = nonlin.broyden1(F, F.xin, iter=12, alpha=1)
+        assert_(nonlin.norm(x) < 1e-9)
+        assert_(nonlin.norm(F(x)) < 1e-9)
+
+    def test_broyden2(self):
+        x = nonlin.broyden2(F, F.xin, iter=12, alpha=1)
+        assert_(nonlin.norm(x) < 1e-9)
+        assert_(nonlin.norm(F(x)) < 1e-9)
+
+    def test_anderson(self):
+        x = nonlin.anderson(F, F.xin, iter=12, alpha=0.03, M=5)
+        assert_(nonlin.norm(x) < 0.33)
+
+    def test_linearmixing(self):
+        x = nonlin.linearmixing(F, F.xin, iter=60, alpha=0.5)
+        assert_(nonlin.norm(x) < 1e-7)
+        assert_(nonlin.norm(F(x)) < 1e-7)
+
+    def test_exciting(self):
+        x = nonlin.excitingmixing(F, F.xin, iter=20, alpha=0.5)
+        assert_(nonlin.norm(x) < 1e-5)
+        assert_(nonlin.norm(F(x)) < 1e-5)
+
+    def test_diagbroyden(self):
+        x = nonlin.diagbroyden(F, F.xin, iter=11, alpha=1)
+        assert_(nonlin.norm(x) < 1e-8)
+        assert_(nonlin.norm(F(x)) < 1e-8)
+
+    def test_root_broyden1(self):
+        res = root(F, F.xin, method='broyden1',
+                   options={'nit': 12, 'jac_options': {'alpha': 1}})
+        assert_(nonlin.norm(res.x) < 1e-9)
+        assert_(nonlin.norm(res.fun) < 1e-9)
+
+    def test_root_broyden2(self):
+        res = root(F, F.xin, method='broyden2',
+                   options={'nit': 12, 'jac_options': {'alpha': 1}})
+        assert_(nonlin.norm(res.x) < 1e-9)
+        assert_(nonlin.norm(res.fun) < 1e-9)
+
+    def test_root_anderson(self):
+        res = root(F, F.xin, method='anderson',
+                   options={'nit': 12,
+                            'jac_options': {'alpha': 0.03, 'M': 5}})
+        assert_(nonlin.norm(res.x) < 0.33)
+
+    def test_root_linearmixing(self):
+        res = root(F, F.xin, method='linearmixing',
+                   options={'nit': 60,
+                            'jac_options': {'alpha': 0.5}})
+        assert_(nonlin.norm(res.x) < 1e-7)
+        assert_(nonlin.norm(res.fun) < 1e-7)
+
+    def test_root_excitingmixing(self):
+        res = root(F, F.xin, method='excitingmixing',
+                   options={'nit': 20,
+                            'jac_options': {'alpha': 0.5}})
+        assert_(nonlin.norm(res.x) < 1e-5)
+        assert_(nonlin.norm(res.fun) < 1e-5)
+
+    def test_root_diagbroyden(self):
+        res = root(F, F.xin, method='diagbroyden',
+                   options={'nit': 11,
+                            'jac_options': {'alpha': 1}})
+        assert_(nonlin.norm(res.x) < 1e-8)
+        assert_(nonlin.norm(res.fun) < 1e-8)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_optimize.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_optimize.py
new file mode 100644
index 0000000000000000000000000000000000000000..86c6ab268ee46fe248c101a095a19df70e766bcb
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_optimize.py
@@ -0,0 +1,3197 @@
+"""
+Unit tests for optimization routines from optimize.py
+
+Authors:
+   Ed Schofield, Nov 2005
+   Andrew Straw, April 2008
+
+To run it in its simplest form::
+  nosetests test_optimize.py
+
+"""
+import itertools
+import platform
+import numpy as np
+from numpy.testing import (assert_allclose, assert_equal,
+                           assert_almost_equal,
+                           assert_no_warnings, assert_warns,
+                           assert_array_less, suppress_warnings)
+import pytest
+from pytest import raises as assert_raises
+
+from scipy import optimize
+from scipy.optimize._minimize import Bounds, NonlinearConstraint
+from scipy.optimize._minimize import (MINIMIZE_METHODS,
+                                      MINIMIZE_METHODS_NEW_CB,
+                                      MINIMIZE_SCALAR_METHODS)
+from scipy.optimize._linprog import LINPROG_METHODS
+from scipy.optimize._root import ROOT_METHODS
+from scipy.optimize._root_scalar import ROOT_SCALAR_METHODS
+from scipy.optimize._qap import QUADRATIC_ASSIGNMENT_METHODS
+from scipy.optimize._differentiable_functions import ScalarFunction, FD_METHODS
+from scipy.optimize._optimize import MemoizeJac, show_options, OptimizeResult
+from scipy.optimize import rosen, rosen_der, rosen_hess
+
+from scipy.sparse import (coo_matrix, csc_matrix, csr_matrix, coo_array,
+                          csr_array, csc_array)
+
+def test_check_grad():
+    # Verify if check_grad is able to estimate the derivative of the
+    # expit (logistic sigmoid) function.
+
+    def expit(x):
+        return 1 / (1 + np.exp(-x))
+
+    def der_expit(x):
+        return np.exp(-x) / (1 + np.exp(-x))**2
+
+    x0 = np.array([1.5])
+
+    r = optimize.check_grad(expit, der_expit, x0)
+    assert_almost_equal(r, 0)
+    r = optimize.check_grad(expit, der_expit, x0,
+                            direction='random', seed=1234)
+    assert_almost_equal(r, 0)
+
+    r = optimize.check_grad(expit, der_expit, x0, epsilon=1e-6)
+    assert_almost_equal(r, 0)
+    r = optimize.check_grad(expit, der_expit, x0, epsilon=1e-6,
+                            direction='random', seed=1234)
+    assert_almost_equal(r, 0)
+
+    # Check if the epsilon parameter is being considered.
+    r = abs(optimize.check_grad(expit, der_expit, x0, epsilon=1e-1) - 0)
+    assert r > 1e-7
+    r = abs(optimize.check_grad(expit, der_expit, x0, epsilon=1e-1,
+                                direction='random', seed=1234) - 0)
+    assert r > 1e-7
+
+    def x_sinx(x):
+        return (x*np.sin(x)).sum()
+
+    def der_x_sinx(x):
+        return np.sin(x) + x*np.cos(x)
+
+    x0 = np.arange(0, 2, 0.2)
+
+    r = optimize.check_grad(x_sinx, der_x_sinx, x0,
+                            direction='random', seed=1234)
+    assert_almost_equal(r, 0)
+
+    assert_raises(ValueError, optimize.check_grad,
+                  x_sinx, der_x_sinx, x0,
+                  direction='random_projection', seed=1234)
+
+    # checking can be done for derivatives of vector valued functions
+    r = optimize.check_grad(himmelblau_grad, himmelblau_hess, himmelblau_x0,
+                            direction='all', seed=1234)
+    assert r < 5e-7
+
+
+class CheckOptimize:
+    """ Base test case for a simple constrained entropy maximization problem
+    (the machine translation example of Berger et al in
+    Computational Linguistics, vol 22, num 1, pp 39--72, 1996.)
+    """
+
+    def setup_method(self):
+        self.F = np.array([[1, 1, 1],
+                           [1, 1, 0],
+                           [1, 0, 1],
+                           [1, 0, 0],
+                           [1, 0, 0]])
+        self.K = np.array([1., 0.3, 0.5])
+        self.startparams = np.zeros(3, np.float64)
+        self.solution = np.array([0., -0.524869316, 0.487525860])
+        self.maxiter = 1000
+        self.funccalls = 0
+        self.gradcalls = 0
+        self.trace = []
+
+    def func(self, x):
+        self.funccalls += 1
+        if self.funccalls > 6000:
+            raise RuntimeError("too many iterations in optimization routine")
+        log_pdot = np.dot(self.F, x)
+        logZ = np.log(sum(np.exp(log_pdot)))
+        f = logZ - np.dot(self.K, x)
+        self.trace.append(np.copy(x))
+        return f
+
+    def grad(self, x):
+        self.gradcalls += 1
+        log_pdot = np.dot(self.F, x)
+        logZ = np.log(sum(np.exp(log_pdot)))
+        p = np.exp(log_pdot - logZ)
+        return np.dot(self.F.transpose(), p) - self.K
+
+    def hess(self, x):
+        log_pdot = np.dot(self.F, x)
+        logZ = np.log(sum(np.exp(log_pdot)))
+        p = np.exp(log_pdot - logZ)
+        return np.dot(self.F.T,
+                      np.dot(np.diag(p), self.F - np.dot(self.F.T, p)))
+
+    def hessp(self, x, p):
+        return np.dot(self.hess(x), p)
+
+
+class CheckOptimizeParameterized(CheckOptimize):
+
+    def test_cg(self):
+        # conjugate gradient optimization routine
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    method='CG', jac=self.grad,
+                                    options=opts)
+            params, fopt, func_calls, grad_calls, warnflag = \
+                res['x'], res['fun'], res['nfev'], res['njev'], res['status']
+        else:
+            retval = optimize.fmin_cg(self.func, self.startparams,
+                                      self.grad, (), maxiter=self.maxiter,
+                                      full_output=True, disp=self.disp,
+                                      retall=False)
+            (params, fopt, func_calls, grad_calls, warnflag) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls == 9, self.funccalls
+        assert self.gradcalls == 7, self.gradcalls
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[2:4],
+                        [[0, -0.5, 0.5],
+                         [0, -5.05700028e-01, 4.95985862e-01]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_cg_cornercase(self):
+        def f(r):
+            return 2.5 * (1 - np.exp(-1.5*(r - 0.5)))**2
+
+        # Check several initial guesses. (Too far away from the
+        # minimum, the function ends up in the flat region of exp.)
+        for x0 in np.linspace(-0.75, 3, 71):
+            sol = optimize.minimize(f, [x0], method='CG')
+            assert sol.success
+            assert_allclose(sol.x, [0.5], rtol=1e-5)
+
+    def test_bfgs(self):
+        # Broyden-Fletcher-Goldfarb-Shanno optimization routine
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams,
+                                    jac=self.grad, method='BFGS', args=(),
+                                    options=opts)
+
+            params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag = (
+                    res['x'], res['fun'], res['jac'], res['hess_inv'],
+                    res['nfev'], res['njev'], res['status'])
+        else:
+            retval = optimize.fmin_bfgs(self.func, self.startparams, self.grad,
+                                        args=(), maxiter=self.maxiter,
+                                        full_output=True, disp=self.disp,
+                                        retall=False)
+            (params, fopt, gopt, Hopt,
+             func_calls, grad_calls, warnflag) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls == 10, self.funccalls
+        assert self.gradcalls == 8, self.gradcalls
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[6:8],
+                        [[0, -5.25060743e-01, 4.87748473e-01],
+                         [0, -5.24885582e-01, 4.87530347e-01]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_bfgs_hess_inv0_neg(self):
+        # Ensure that BFGS does not accept neg. def. initial inverse
+        # Hessian estimate.
+        with pytest.raises(ValueError, match="'hess_inv0' matrix isn't "
+                           "positive definite."):
+            x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
+            opts = {'disp': self.disp, 'hess_inv0': -np.eye(5)}
+            optimize.minimize(optimize.rosen, x0=x0, method='BFGS', args=(),
+                              options=opts)
+
+    def test_bfgs_hess_inv0_semipos(self):
+        # Ensure that BFGS does not accept semi pos. def. initial inverse
+        # Hessian estimate.
+        with pytest.raises(ValueError, match="'hess_inv0' matrix isn't "
+                           "positive definite."):
+            x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
+            hess_inv0 = np.eye(5)
+            hess_inv0[0, 0] = 0
+            opts = {'disp': self.disp, 'hess_inv0': hess_inv0}
+            optimize.minimize(optimize.rosen, x0=x0, method='BFGS', args=(),
+                              options=opts)
+
+    def test_bfgs_hess_inv0_sanity(self):
+        # Ensure that BFGS handles `hess_inv0` parameter correctly.
+        fun = optimize.rosen
+        x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
+        opts = {'disp': self.disp, 'hess_inv0': 1e-2 * np.eye(5)}
+        res = optimize.minimize(fun, x0=x0, method='BFGS', args=(),
+                                options=opts)
+        res_true = optimize.minimize(fun, x0=x0, method='BFGS', args=(),
+                                     options={'disp': self.disp})
+        assert_allclose(res.fun, res_true.fun, atol=1e-6)
+
+    @pytest.mark.filterwarnings('ignore::UserWarning')
+    def test_bfgs_infinite(self):
+        # Test corner case where -Inf is the minimum.  See gh-2019.
+        def func(x):
+            return -np.e ** (-x)
+        def fprime(x):
+            return -func(x)
+        x0 = [0]
+        with np.errstate(over='ignore'):
+            if self.use_wrapper:
+                opts = {'disp': self.disp}
+                x = optimize.minimize(func, x0, jac=fprime, method='BFGS',
+                                      args=(), options=opts)['x']
+            else:
+                x = optimize.fmin_bfgs(func, x0, fprime, disp=self.disp)
+            assert not np.isfinite(func(x))
+
+    def test_bfgs_xrtol(self):
+        # test for #17345 to test xrtol parameter
+        x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
+        res = optimize.minimize(optimize.rosen,
+                                x0, method='bfgs', options={'xrtol': 1e-3})
+        ref = optimize.minimize(optimize.rosen,
+                                x0, method='bfgs', options={'gtol': 1e-3})
+        assert res.nit != ref.nit
+
+    def test_bfgs_c1(self):
+        # test for #18977 insufficiently low value of c1 leads to precision loss
+        # for poor starting parameters
+        x0 = [10.3, 20.7, 10.8, 1.9, -1.2]
+        res_c1_small = optimize.minimize(optimize.rosen,
+                                         x0, method='bfgs', options={'c1': 1e-8})
+        res_c1_big = optimize.minimize(optimize.rosen,
+                                       x0, method='bfgs', options={'c1': 1e-1})
+
+        assert res_c1_small.nfev > res_c1_big.nfev
+
+    def test_bfgs_c2(self):
+        # test that modification of c2 parameter
+        # results in different number of iterations
+        x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
+        res_default = optimize.minimize(optimize.rosen,
+                                        x0, method='bfgs', options={'c2': .9})
+        res_mod = optimize.minimize(optimize.rosen,
+                                    x0, method='bfgs', options={'c2': 1e-2})
+        assert res_default.nit > res_mod.nit
+
+    @pytest.mark.parametrize(["c1", "c2"], [[0.5, 2],
+                                            [-0.1, 0.1],
+                                            [0.2, 0.1]])
+    def test_invalid_c1_c2(self, c1, c2):
+        with pytest.raises(ValueError, match="'c1' and 'c2'"):
+            x0 = [10.3, 20.7, 10.8, 1.9, -1.2]
+            optimize.minimize(optimize.rosen, x0, method='cg',
+                              options={'c1': c1, 'c2': c2})
+
+    def test_powell(self):
+        # Powell (direction set) optimization routine
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    method='Powell', options=opts)
+            params, fopt, direc, numiter, func_calls, warnflag = (
+                    res['x'], res['fun'], res['direc'], res['nit'],
+                    res['nfev'], res['status'])
+        else:
+            retval = optimize.fmin_powell(self.func, self.startparams,
+                                          args=(), maxiter=self.maxiter,
+                                          full_output=True, disp=self.disp,
+                                          retall=False)
+            (params, fopt, direc, numiter, func_calls, warnflag) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+        # params[0] does not affect the objective function
+        assert_allclose(params[1:], self.solution[1:], atol=5e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        #
+        # However, some leeway must be added: the exact evaluation
+        # count is sensitive to numerical error, and floating-point
+        # computations are not bit-for-bit reproducible across
+        # machines, and when using e.g., MKL, data alignment
+        # etc., affect the rounding error.
+        #
+        assert self.funccalls <= 116 + 20, self.funccalls
+        assert self.gradcalls == 0, self.gradcalls
+
+    @pytest.mark.xfail(reason="This part of test_powell fails on some "
+                       "platforms, but the solution returned by powell is "
+                       "still valid.")
+    def test_powell_gh14014(self):
+        # This part of test_powell started failing on some CI platforms;
+        # see gh-14014. Since the solution is still correct and the comments
+        # in test_powell suggest that small differences in the bits are known
+        # to change the "trace" of the solution, seems safe to xfail to get CI
+        # green now and investigate later.
+
+        # Powell (direction set) optimization routine
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    method='Powell', options=opts)
+            params, fopt, direc, numiter, func_calls, warnflag = (
+                    res['x'], res['fun'], res['direc'], res['nit'],
+                    res['nfev'], res['status'])
+        else:
+            retval = optimize.fmin_powell(self.func, self.startparams,
+                                          args=(), maxiter=self.maxiter,
+                                          full_output=True, disp=self.disp,
+                                          retall=False)
+            (params, fopt, direc, numiter, func_calls, warnflag) = retval
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[34:39],
+                        [[0.72949016, -0.44156936, 0.47100962],
+                         [0.72949016, -0.44156936, 0.48052496],
+                         [1.45898031, -0.88313872, 0.95153458],
+                         [0.72949016, -0.44156936, 0.47576729],
+                         [1.72949016, -0.44156936, 0.47576729]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_powell_bounded(self):
+        # Powell (direction set) optimization routine
+        # same as test_powell above, but with bounds
+        bounds = [(-np.pi, np.pi) for _ in self.startparams]
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    bounds=bounds,
+                                    method='Powell', options=opts)
+            params, func_calls = (res['x'], res['nfev'])
+
+            assert func_calls == self.funccalls
+            assert_allclose(self.func(params), self.func(self.solution),
+                            atol=1e-6, rtol=1e-5)
+
+            # The exact evaluation count is sensitive to numerical error, and
+            # floating-point computations are not bit-for-bit reproducible
+            # across machines, and when using e.g. MKL, data alignment etc.
+            # affect the rounding error.
+            # It takes 155 calls on my machine, but we can add the same +20
+            # margin as is used in `test_powell`
+            assert self.funccalls <= 155 + 20
+            assert self.gradcalls == 0
+
+    def test_neldermead(self):
+        # Nelder-Mead simplex algorithm
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    method='Nelder-mead', options=opts)
+            params, fopt, numiter, func_calls, warnflag = (
+                    res['x'], res['fun'], res['nit'], res['nfev'],
+                    res['status'])
+        else:
+            retval = optimize.fmin(self.func, self.startparams,
+                                   args=(), maxiter=self.maxiter,
+                                   full_output=True, disp=self.disp,
+                                   retall=False)
+            (params, fopt, numiter, func_calls, warnflag) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls == 167, self.funccalls
+        assert self.gradcalls == 0, self.gradcalls
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[76:78],
+                        [[0.1928968, -0.62780447, 0.35166118],
+                         [0.19572515, -0.63648426, 0.35838135]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_neldermead_initial_simplex(self):
+        # Nelder-Mead simplex algorithm
+        simplex = np.zeros((4, 3))
+        simplex[...] = self.startparams
+        for j in range(3):
+            simplex[j+1, j] += 0.1
+
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': False,
+                    'return_all': True, 'initial_simplex': simplex}
+            res = optimize.minimize(self.func, self.startparams, args=(),
+                                    method='Nelder-mead', options=opts)
+            params, fopt, numiter, func_calls, warnflag = (res['x'],
+                                                           res['fun'],
+                                                           res['nit'],
+                                                           res['nfev'],
+                                                           res['status'])
+            assert_allclose(res['allvecs'][0], simplex[0])
+        else:
+            retval = optimize.fmin(self.func, self.startparams,
+                                   args=(), maxiter=self.maxiter,
+                                   full_output=True, disp=False, retall=False,
+                                   initial_simplex=simplex)
+
+            (params, fopt, numiter, func_calls, warnflag) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.17.0. Don't allow them to increase.
+        assert self.funccalls == 100, self.funccalls
+        assert self.gradcalls == 0, self.gradcalls
+
+        # Ensure that the function behaves the same; this is from SciPy 0.15.0
+        assert_allclose(self.trace[50:52],
+                        [[0.14687474, -0.5103282, 0.48252111],
+                         [0.14474003, -0.5282084, 0.48743951]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_neldermead_initial_simplex_bad(self):
+        # Check it fails with a bad simplices
+        bad_simplices = []
+
+        simplex = np.zeros((3, 2))
+        simplex[...] = self.startparams[:2]
+        for j in range(2):
+            simplex[j+1, j] += 0.1
+        bad_simplices.append(simplex)
+
+        simplex = np.zeros((3, 3))
+        bad_simplices.append(simplex)
+
+        for simplex in bad_simplices:
+            if self.use_wrapper:
+                opts = {'maxiter': self.maxiter, 'disp': False,
+                        'return_all': False, 'initial_simplex': simplex}
+                assert_raises(ValueError,
+                              optimize.minimize,
+                              self.func,
+                              self.startparams,
+                              args=(),
+                              method='Nelder-mead',
+                              options=opts)
+            else:
+                assert_raises(ValueError, optimize.fmin,
+                              self.func, self.startparams,
+                              args=(), maxiter=self.maxiter,
+                              full_output=True, disp=False, retall=False,
+                              initial_simplex=simplex)
+
+    def test_neldermead_x0_ub(self):
+        # checks whether minimisation occurs correctly for entries where
+        # x0 == ub
+        # gh19991
+        def quad(x):
+            return np.sum(x**2)
+
+        res = optimize.minimize(
+            quad,
+            [1],
+            bounds=[(0, 1.)],
+            method='nelder-mead'
+        )
+        assert_allclose(res.x, [0])
+
+        res = optimize.minimize(
+            quad,
+            [1, 2],
+            bounds=[(0, 1.), (1, 3.)],
+            method='nelder-mead'
+        )
+        assert_allclose(res.x, [0, 1])
+
+    def test_ncg_negative_maxiter(self):
+        # Regression test for gh-8241
+        opts = {'maxiter': -1}
+        result = optimize.minimize(self.func, self.startparams,
+                                   method='Newton-CG', jac=self.grad,
+                                   args=(), options=opts)
+        assert result.status == 1
+
+    def test_ncg_zero_xtol(self):
+        # Regression test for gh-20214
+        def cosine(x):
+            return np.cos(x[0])
+
+        def jac(x):
+            return -np.sin(x[0])
+
+        x0 = [0.1]
+        xtol = 0
+        result = optimize.minimize(cosine,
+                                   x0=x0,
+                                   jac=jac,
+                                   method="newton-cg",
+                                   options=dict(xtol=xtol))
+        assert result.status == 0
+        assert_almost_equal(result.x[0], np.pi)
+
+    def test_ncg(self):
+        # line-search Newton conjugate gradient optimization routine
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            retval = optimize.minimize(self.func, self.startparams,
+                                       method='Newton-CG', jac=self.grad,
+                                       args=(), options=opts)['x']
+        else:
+            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
+                                       args=(), maxiter=self.maxiter,
+                                       full_output=False, disp=self.disp,
+                                       retall=False)
+
+        params = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls == 7, self.funccalls
+        assert self.gradcalls <= 22, self.gradcalls  # 0.13.0
+        # assert self.gradcalls <= 18, self.gradcalls  # 0.9.0
+        # assert self.gradcalls == 18, self.gradcalls  # 0.8.0
+        # assert self.gradcalls == 22, self.gradcalls  # 0.7.0
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[3:5],
+                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
+                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
+                        atol=1e-6, rtol=1e-7)
+
+    def test_ncg_hess(self):
+        # Newton conjugate gradient with Hessian
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            retval = optimize.minimize(self.func, self.startparams,
+                                       method='Newton-CG', jac=self.grad,
+                                       hess=self.hess,
+                                       args=(), options=opts)['x']
+        else:
+            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
+                                       fhess=self.hess,
+                                       args=(), maxiter=self.maxiter,
+                                       full_output=False, disp=self.disp,
+                                       retall=False)
+
+        params = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls <= 7, self.funccalls  # gh10673
+        assert self.gradcalls <= 18, self.gradcalls  # 0.9.0
+        # assert self.gradcalls == 18, self.gradcalls  # 0.8.0
+        # assert self.gradcalls == 22, self.gradcalls  # 0.7.0
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[3:5],
+                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
+                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
+                        atol=1e-6, rtol=1e-7)
+
+    def test_ncg_hessp(self):
+        # Newton conjugate gradient with Hessian times a vector p.
+        if self.use_wrapper:
+            opts = {'maxiter': self.maxiter, 'disp': self.disp,
+                    'return_all': False}
+            retval = optimize.minimize(self.func, self.startparams,
+                                       method='Newton-CG', jac=self.grad,
+                                       hessp=self.hessp,
+                                       args=(), options=opts)['x']
+        else:
+            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
+                                       fhess_p=self.hessp,
+                                       args=(), maxiter=self.maxiter,
+                                       full_output=False, disp=self.disp,
+                                       retall=False)
+
+        params = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls <= 7, self.funccalls  # gh10673
+        assert self.gradcalls <= 18, self.gradcalls  # 0.9.0
+        # assert self.gradcalls == 18, self.gradcalls  # 0.8.0
+        # assert self.gradcalls == 22, self.gradcalls  # 0.7.0
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        assert_allclose(self.trace[3:5],
+                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
+                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
+                        atol=1e-6, rtol=1e-7)
+
+    def test_cobyqa(self):
+        # COBYQA method.
+        if self.use_wrapper:
+            res = optimize.minimize(
+                self.func,
+                self.startparams,
+                method='cobyqa',
+                options={'maxiter': self.maxiter, 'disp': self.disp},
+            )
+            assert_allclose(res.fun, self.func(self.solution), atol=1e-6)
+
+            # Ensure that function call counts are 'known good'; these are from
+            # SciPy 1.14.0. Don't allow them to increase. The exact evaluation
+            # count is sensitive to numerical error and floating-point
+            # computations are not bit-for-bit reproducible across machines. It
+            # takes 45 calls on my machine, but we can add the same +20 margin
+            # as is used in `test_powell`
+            assert self.funccalls <= 45 + 20, self.funccalls
+
+
+def test_maxfev_test():
+    rng = np.random.default_rng(271707100830272976862395227613146332411)
+
+    def cost(x):
+        return rng.random(1) * 1000  # never converged problem
+
+    for imaxfev in [1, 10, 50]:
+        # "TNC" and "L-BFGS-B" also supports max function evaluation, but
+        # these may violate the limit because of evaluating gradients
+        # by numerical differentiation. See the discussion in PR #14805.
+        for method in ['Powell', 'Nelder-Mead']:
+            result = optimize.minimize(cost, rng.random(10),
+                                       method=method,
+                                       options={'maxfev': imaxfev})
+            assert result["nfev"] == imaxfev
+
+
+def test_wrap_scalar_function_with_validation():
+
+    def func_(x):
+        return x
+
+    fcalls, func = optimize._optimize.\
+        _wrap_scalar_function_maxfun_validation(func_, np.asarray(1), 5)
+
+    for i in range(5):
+        func(np.asarray(i))
+        assert fcalls[0] == i+1
+
+    msg = "Too many function calls"
+    with assert_raises(optimize._optimize._MaxFuncCallError, match=msg):
+        func(np.asarray(i))  # exceeded maximum function call
+
+    fcalls, func = optimize._optimize.\
+        _wrap_scalar_function_maxfun_validation(func_, np.asarray(1), 5)
+
+    msg = "The user-provided objective function must return a scalar value."
+    with assert_raises(ValueError, match=msg):
+        func(np.array([1, 1]))
+
+
+def test_obj_func_returns_scalar():
+    match = ("The user-provided "
+             "objective function must "
+             "return a scalar value.")
+    with assert_raises(ValueError, match=match):
+        optimize.minimize(lambda x: x, np.array([1, 1]), method='BFGS')
+
+
+def test_neldermead_iteration_num():
+    x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
+    res = optimize._minimize._minimize_neldermead(optimize.rosen, x0,
+                                                  xatol=1e-8)
+    assert res.nit <= 339
+
+
+def test_neldermead_respect_fp():
+    # Nelder-Mead should respect the fp type of the input + function
+    x0 = np.array([5.0, 4.0]).astype(np.float32)
+    def rosen_(x):
+        assert x.dtype == np.float32
+        return optimize.rosen(x)
+
+    optimize.minimize(rosen_, x0, method='Nelder-Mead')
+
+
+def test_neldermead_xatol_fatol():
+    # gh4484
+    # test we can call with fatol, xatol specified
+    def func(x):
+        return x[0] ** 2 + x[1] ** 2
+
+    optimize._minimize._minimize_neldermead(func, [1, 1], maxiter=2,
+                                            xatol=1e-3, fatol=1e-3)
+
+
+def test_neldermead_adaptive():
+    def func(x):
+        return np.sum(x ** 2)
+    p0 = [0.15746215, 0.48087031, 0.44519198, 0.4223638, 0.61505159,
+          0.32308456, 0.9692297, 0.4471682, 0.77411992, 0.80441652,
+          0.35994957, 0.75487856, 0.99973421, 0.65063887, 0.09626474]
+
+    res = optimize.minimize(func, p0, method='Nelder-Mead')
+    assert_equal(res.success, False)
+
+    res = optimize.minimize(func, p0, method='Nelder-Mead',
+                            options={'adaptive': True})
+    assert_equal(res.success, True)
+
+
+def test_bounded_powell_outsidebounds():
+    # With the bounded Powell method if you start outside the bounds the final
+    # should still be within the bounds (provided that the user doesn't make a
+    # bad choice for the `direc` argument).
+    def func(x):
+        return np.sum(x ** 2)
+    bounds = (-1, 1), (-1, 1), (-1, 1)
+    x0 = [-4, .5, -.8]
+
+    # we're starting outside the bounds, so we should get a warning
+    with assert_warns(optimize.OptimizeWarning):
+        res = optimize.minimize(func, x0, bounds=bounds, method="Powell")
+    assert_allclose(res.x, np.array([0.] * len(x0)), atol=1e-6)
+    assert_equal(res.success, True)
+    assert_equal(res.status, 0)
+
+    # However, now if we change the `direc` argument such that the
+    # set of vectors does not span the parameter space, then we may
+    # not end up back within the bounds. Here we see that the first
+    # parameter cannot be updated!
+    direc = [[0, 0, 0], [0, 1, 0], [0, 0, 1]]
+    # we're starting outside the bounds, so we should get a warning
+    with assert_warns(optimize.OptimizeWarning):
+        res = optimize.minimize(func, x0,
+                                bounds=bounds, method="Powell",
+                                options={'direc': direc})
+    assert_allclose(res.x, np.array([-4., 0, 0]), atol=1e-6)
+    assert_equal(res.success, False)
+    assert_equal(res.status, 4)
+
+
+def test_bounded_powell_vs_powell():
+    # here we test an example where the bounded Powell method
+    # will return a different result than the standard Powell
+    # method.
+
+    # first we test a simple example where the minimum is at
+    # the origin and the minimum that is within the bounds is
+    # larger than the minimum at the origin.
+    def func(x):
+        return np.sum(x ** 2)
+    bounds = (-5, -1), (-10, -0.1), (1, 9.2), (-4, 7.6), (-15.9, -2)
+    x0 = [-2.1, -5.2, 1.9, 0, -2]
+
+    options = {'ftol': 1e-10, 'xtol': 1e-10}
+
+    res_powell = optimize.minimize(func, x0, method="Powell", options=options)
+    assert_allclose(res_powell.x, 0., atol=1e-6)
+    assert_allclose(res_powell.fun, 0., atol=1e-6)
+
+    res_bounded_powell = optimize.minimize(func, x0, options=options,
+                                           bounds=bounds,
+                                           method="Powell")
+    p = np.array([-1, -0.1, 1, 0, -2])
+    assert_allclose(res_bounded_powell.x, p, atol=1e-6)
+    assert_allclose(res_bounded_powell.fun, func(p), atol=1e-6)
+
+    # now we test bounded Powell but with a mix of inf bounds.
+    bounds = (None, -1), (-np.inf, -.1), (1, np.inf), (-4, None), (-15.9, -2)
+    res_bounded_powell = optimize.minimize(func, x0, options=options,
+                                           bounds=bounds,
+                                           method="Powell")
+    p = np.array([-1, -0.1, 1, 0, -2])
+    assert_allclose(res_bounded_powell.x, p, atol=1e-6)
+    assert_allclose(res_bounded_powell.fun, func(p), atol=1e-6)
+
+    # next we test an example where the global minimum is within
+    # the bounds, but the bounded Powell method performs better
+    # than the standard Powell method.
+    def func(x):
+        t = np.sin(-x[0]) * np.cos(x[1]) * np.sin(-x[0] * x[1]) * np.cos(x[1])
+        t -= np.cos(np.sin(x[1] * x[2]) * np.cos(x[2]))
+        return t**2
+
+    bounds = [(-2, 5)] * 3
+    x0 = [-0.5, -0.5, -0.5]
+
+    res_powell = optimize.minimize(func, x0, method="Powell")
+    res_bounded_powell = optimize.minimize(func, x0,
+                                           bounds=bounds,
+                                           method="Powell")
+    assert_allclose(res_powell.fun, 0.007136253919761627, atol=1e-6)
+    assert_allclose(res_bounded_powell.fun, 0, atol=1e-6)
+
+    # next we test the previous example where the we provide Powell
+    # with (-inf, inf) bounds, and compare it to providing Powell
+    # with no bounds. They should end up the same.
+    bounds = [(-np.inf, np.inf)] * 3
+
+    res_bounded_powell = optimize.minimize(func, x0,
+                                           bounds=bounds,
+                                           method="Powell")
+    assert_allclose(res_powell.fun, res_bounded_powell.fun, atol=1e-6)
+    assert_allclose(res_powell.nfev, res_bounded_powell.nfev, atol=1e-6)
+    assert_allclose(res_powell.x, res_bounded_powell.x, atol=1e-6)
+
+    # now test when x0 starts outside of the bounds.
+    x0 = [45.46254415, -26.52351498, 31.74830248]
+    bounds = [(-2, 5)] * 3
+    # we're starting outside the bounds, so we should get a warning
+    with assert_warns(optimize.OptimizeWarning):
+        res_bounded_powell = optimize.minimize(func, x0,
+                                               bounds=bounds,
+                                               method="Powell")
+    assert_allclose(res_bounded_powell.fun, 0, atol=1e-6)
+
+
+def test_onesided_bounded_powell_stability():
+    # When the Powell method is bounded on only one side, a
+    # np.tan transform is done in order to convert it into a
+    # completely bounded problem. Here we do some simple tests
+    # of one-sided bounded Powell where the optimal solutions
+    # are large to test the stability of the transformation.
+    kwargs = {'method': 'Powell',
+              'bounds': [(-np.inf, 1e6)] * 3,
+              'options': {'ftol': 1e-8, 'xtol': 1e-8}}
+    x0 = [1, 1, 1]
+
+    # df/dx is constant.
+    def f(x):
+        return -np.sum(x)
+    res = optimize.minimize(f, x0, **kwargs)
+    assert_allclose(res.fun, -3e6, atol=1e-4)
+
+    # df/dx gets smaller and smaller.
+    def f(x):
+        return -np.abs(np.sum(x)) ** (0.1) * (1 if np.all(x > 0) else -1)
+
+    res = optimize.minimize(f, x0, **kwargs)
+    assert_allclose(res.fun, -(3e6) ** (0.1))
+
+    # df/dx gets larger and larger.
+    def f(x):
+        return -np.abs(np.sum(x)) ** 10 * (1 if np.all(x > 0) else -1)
+
+    res = optimize.minimize(f, x0, **kwargs)
+    assert_allclose(res.fun, -(3e6) ** 10, rtol=1e-7)
+
+    # df/dx gets larger for some of the variables and smaller for others.
+    def f(x):
+        t = -np.abs(np.sum(x[:2])) ** 5 - np.abs(np.sum(x[2:])) ** (0.1)
+        t *= (1 if np.all(x > 0) else -1)
+        return t
+
+    kwargs['bounds'] = [(-np.inf, 1e3)] * 3
+    res = optimize.minimize(f, x0, **kwargs)
+    assert_allclose(res.fun, -(2e3) ** 5 - (1e6) ** (0.1), rtol=1e-7)
+
+
+class TestOptimizeWrapperDisp(CheckOptimizeParameterized):
+    use_wrapper = True
+    disp = True
+
+
+class TestOptimizeWrapperNoDisp(CheckOptimizeParameterized):
+    use_wrapper = True
+    disp = False
+
+
+class TestOptimizeNoWrapperDisp(CheckOptimizeParameterized):
+    use_wrapper = False
+    disp = True
+
+
+class TestOptimizeNoWrapperNoDisp(CheckOptimizeParameterized):
+    use_wrapper = False
+    disp = False
+
+
+class TestOptimizeSimple(CheckOptimize):
+
+    def test_bfgs_nan(self):
+        # Test corner case where nan is fed to optimizer.  See gh-2067.
+        def func(x):
+            return x
+        def fprime(x):
+            return np.ones_like(x)
+        x0 = [np.nan]
+        with np.errstate(over='ignore', invalid='ignore'):
+            x = optimize.fmin_bfgs(func, x0, fprime, disp=False)
+            assert np.isnan(func(x))
+
+    def test_bfgs_nan_return(self):
+        # Test corner cases where fun returns NaN. See gh-4793.
+
+        # First case: NaN from first call.
+        def func(x):
+            return np.nan
+        with np.errstate(invalid='ignore'):
+            result = optimize.minimize(func, 0)
+
+        assert np.isnan(result['fun'])
+        assert result['success'] is False
+
+        # Second case: NaN from second call.
+        def func(x):
+            return 0 if x == 0 else np.nan
+        def fprime(x):
+            return np.ones_like(x)  # Steer away from zero.
+        with np.errstate(invalid='ignore'):
+            result = optimize.minimize(func, 0, jac=fprime)
+
+        assert np.isnan(result['fun'])
+        assert result['success'] is False
+
+    def test_bfgs_numerical_jacobian(self):
+        # BFGS with numerical Jacobian and a vector epsilon parameter.
+        # define the epsilon parameter using a random vector
+        epsilon = np.sqrt(np.spacing(1.)) * np.random.rand(len(self.solution))
+
+        params = optimize.fmin_bfgs(self.func, self.startparams,
+                                    epsilon=epsilon, args=(),
+                                    maxiter=self.maxiter, disp=False)
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+    def test_finite_differences_jac(self):
+        methods = ['BFGS', 'CG', 'TNC']
+        jacs = ['2-point', '3-point', None]
+        for method, jac in itertools.product(methods, jacs):
+            result = optimize.minimize(self.func, self.startparams,
+                                       method=method, jac=jac)
+            assert_allclose(self.func(result.x), self.func(self.solution),
+                            atol=1e-6)
+
+    def test_finite_differences_hess(self):
+        # test that all the methods that require hess can use finite-difference
+        # For Newton-CG, trust-ncg, trust-krylov the FD estimated hessian is
+        # wrapped in a hessp function
+        # dogleg, trust-exact actually require true hessians at the moment, so
+        # they're excluded.
+        methods = ['trust-constr', 'Newton-CG', 'trust-ncg', 'trust-krylov']
+        hesses = FD_METHODS + (optimize.BFGS,)
+        for method, hess in itertools.product(methods, hesses):
+            if hess is optimize.BFGS:
+                hess = hess()
+            result = optimize.minimize(self.func, self.startparams,
+                                       method=method, jac=self.grad,
+                                       hess=hess)
+            assert result.success
+
+        # check that the methods demand some sort of Hessian specification
+        # Newton-CG creates its own hessp, and trust-constr doesn't need a hess
+        # specified either
+        methods = ['trust-ncg', 'trust-krylov', 'dogleg', 'trust-exact']
+        for method in methods:
+            with pytest.raises(ValueError):
+                optimize.minimize(self.func, self.startparams,
+                                  method=method, jac=self.grad,
+                                  hess=None)
+
+    def test_bfgs_gh_2169(self):
+        def f(x):
+            if x < 0:
+                return 1.79769313e+308
+            else:
+                return x + 1./x
+        xs = optimize.fmin_bfgs(f, [10.], disp=False)
+        assert_allclose(xs, 1.0, rtol=1e-4, atol=1e-4)
+
+    def test_bfgs_double_evaluations(self):
+        # check BFGS does not evaluate twice in a row at same point
+        def f(x):
+            xp = x[0]
+            assert xp not in seen
+            seen.add(xp)
+            return 10*x**2, 20*x
+
+        seen = set()
+        optimize.minimize(f, -100, method='bfgs', jac=True, tol=1e-7)
+
+    def test_l_bfgs_b(self):
+        # limited-memory bound-constrained BFGS algorithm
+        retval = optimize.fmin_l_bfgs_b(self.func, self.startparams,
+                                        self.grad, args=(),
+                                        maxiter=self.maxiter)
+
+        (params, fopt, d) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+        # Ensure that function call counts are 'known good'; these are from
+        # SciPy 0.7.0. Don't allow them to increase.
+        assert self.funccalls == 7, self.funccalls
+        assert self.gradcalls == 5, self.gradcalls
+
+        # Ensure that the function behaves the same; this is from SciPy 0.7.0
+        # test fixed in gh10673
+        assert_allclose(self.trace[3:5],
+                        [[8.117083e-16, -5.196198e-01, 4.897617e-01],
+                         [0., -0.52489628, 0.48753042]],
+                        atol=1e-14, rtol=1e-7)
+
+    def test_l_bfgs_b_numjac(self):
+        # L-BFGS-B with numerical Jacobian
+        retval = optimize.fmin_l_bfgs_b(self.func, self.startparams,
+                                        approx_grad=True,
+                                        maxiter=self.maxiter)
+
+        (params, fopt, d) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+    def test_l_bfgs_b_funjac(self):
+        # L-BFGS-B with combined objective function and Jacobian
+        def fun(x):
+            return self.func(x), self.grad(x)
+
+        retval = optimize.fmin_l_bfgs_b(fun, self.startparams,
+                                        maxiter=self.maxiter)
+
+        (params, fopt, d) = retval
+
+        assert_allclose(self.func(params), self.func(self.solution),
+                        atol=1e-6)
+
+    def test_l_bfgs_b_maxiter(self):
+        # gh7854
+        # Ensure that not more than maxiters are ever run.
+        class Callback:
+            def __init__(self):
+                self.nit = 0
+                self.fun = None
+                self.x = None
+
+            def __call__(self, x):
+                self.x = x
+                self.fun = optimize.rosen(x)
+                self.nit += 1
+
+        c = Callback()
+        res = optimize.minimize(optimize.rosen, [0., 0.], method='l-bfgs-b',
+                                callback=c, options={'maxiter': 5})
+
+        assert_equal(res.nit, 5)
+        assert_almost_equal(res.x, c.x)
+        assert_almost_equal(res.fun, c.fun)
+        assert_equal(res.status, 1)
+        assert res.success is False
+        assert_equal(res.message,
+                     'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT')
+
+    def test_minimize_l_bfgs_b(self):
+        # Minimize with L-BFGS-B method
+        opts = {'disp': False, 'maxiter': self.maxiter}
+        r = optimize.minimize(self.func, self.startparams,
+                              method='L-BFGS-B', jac=self.grad,
+                              options=opts)
+        assert_allclose(self.func(r.x), self.func(self.solution),
+                        atol=1e-6)
+        assert self.gradcalls == r.njev
+
+        self.funccalls = self.gradcalls = 0
+        # approximate jacobian
+        ra = optimize.minimize(self.func, self.startparams,
+                               method='L-BFGS-B', options=opts)
+        # check that function evaluations in approximate jacobian are counted
+        # assert_(ra.nfev > r.nfev)
+        assert self.funccalls == ra.nfev
+        assert_allclose(self.func(ra.x), self.func(self.solution),
+                        atol=1e-6)
+
+        self.funccalls = self.gradcalls = 0
+        # approximate jacobian
+        ra = optimize.minimize(self.func, self.startparams, jac='3-point',
+                               method='L-BFGS-B', options=opts)
+        assert self.funccalls == ra.nfev
+        assert_allclose(self.func(ra.x), self.func(self.solution),
+                        atol=1e-6)
+
+    def test_minimize_l_bfgs_b_ftol(self):
+        # Check that the `ftol` parameter in l_bfgs_b works as expected
+        v0 = None
+        for tol in [1e-1, 1e-4, 1e-7, 1e-10]:
+            opts = {'disp': False, 'maxiter': self.maxiter, 'ftol': tol}
+            sol = optimize.minimize(self.func, self.startparams,
+                                    method='L-BFGS-B', jac=self.grad,
+                                    options=opts)
+            v = self.func(sol.x)
+
+            if v0 is None:
+                v0 = v
+            else:
+                assert v < v0
+
+            assert_allclose(v, self.func(self.solution), rtol=tol)
+
+    def test_minimize_l_bfgs_maxls(self):
+        # check that the maxls is passed down to the Fortran routine
+        sol = optimize.minimize(optimize.rosen, np.array([-1.2, 1.0]),
+                                method='L-BFGS-B', jac=optimize.rosen_der,
+                                options={'disp': False, 'maxls': 1})
+        assert not sol.success
+
+    def test_minimize_l_bfgs_b_maxfun_interruption(self):
+        # gh-6162
+        f = optimize.rosen
+        g = optimize.rosen_der
+        values = []
+        x0 = np.full(7, 1000)
+
+        def objfun(x):
+            value = f(x)
+            values.append(value)
+            return value
+
+        # Look for an interesting test case.
+        # Request a maxfun that stops at a particularly bad function
+        # evaluation somewhere between 100 and 300 evaluations.
+        low, medium, high = 30, 100, 300
+        optimize.fmin_l_bfgs_b(objfun, x0, fprime=g, maxfun=high)
+        v, k = max((y, i) for i, y in enumerate(values[medium:]))
+        maxfun = medium + k
+        # If the minimization strategy is reasonable,
+        # the minimize() result should not be worse than the best
+        # of the first 30 function evaluations.
+        target = min(values[:low])
+        xmin, fmin, d = optimize.fmin_l_bfgs_b(f, x0, fprime=g, maxfun=maxfun)
+        assert_array_less(fmin, target)
+
+    def test_custom(self):
+        # This function comes from the documentation example.
+        def custmin(fun, x0, args=(), maxfev=None, stepsize=0.1,
+                    maxiter=100, callback=None, **options):
+            bestx = x0
+            besty = fun(x0)
+            funcalls = 1
+            niter = 0
+            improved = True
+            stop = False
+
+            while improved and not stop and niter < maxiter:
+                improved = False
+                niter += 1
+                for dim in range(np.size(x0)):
+                    for s in [bestx[dim] - stepsize, bestx[dim] + stepsize]:
+                        testx = np.copy(bestx)
+                        testx[dim] = s
+                        testy = fun(testx, *args)
+                        funcalls += 1
+                        if testy < besty:
+                            besty = testy
+                            bestx = testx
+                            improved = True
+                    if callback is not None:
+                        callback(bestx)
+                    if maxfev is not None and funcalls >= maxfev:
+                        stop = True
+                        break
+
+            return optimize.OptimizeResult(fun=besty, x=bestx, nit=niter,
+                                           nfev=funcalls, success=(niter > 1))
+
+        x0 = [1.35, 0.9, 0.8, 1.1, 1.2]
+        res = optimize.minimize(optimize.rosen, x0, method=custmin,
+                                options=dict(stepsize=0.05))
+        assert_allclose(res.x, 1.0, rtol=1e-4, atol=1e-4)
+
+    def test_gh10771(self):
+        # check that minimize passes bounds and constraints to a custom
+        # minimizer without altering them.
+        bounds = [(-2, 2), (0, 3)]
+        constraints = 'constraints'
+
+        def custmin(fun, x0, **options):
+            assert options['bounds'] is bounds
+            assert options['constraints'] is constraints
+            return optimize.OptimizeResult()
+
+        x0 = [1, 1]
+        optimize.minimize(optimize.rosen, x0, method=custmin,
+                          bounds=bounds, constraints=constraints)
+
+    def test_minimize_tol_parameter(self):
+        # Check that the minimize() tol= argument does something
+        def func(z):
+            x, y = z
+            return x**2*y**2 + x**4 + 1
+
+        def dfunc(z):
+            x, y = z
+            return np.array([2*x*y**2 + 4*x**3, 2*x**2*y])
+
+        for method in ['nelder-mead', 'powell', 'cg', 'bfgs',
+                       'newton-cg', 'l-bfgs-b', 'tnc',
+                       'cobyla', 'cobyqa', 'slsqp']:
+            if method in ('nelder-mead', 'powell', 'cobyla', 'cobyqa'):
+                jac = None
+            else:
+                jac = dfunc
+
+            sol1 = optimize.minimize(func, [2, 2], jac=jac, tol=1e-10,
+                                     method=method)
+            sol2 = optimize.minimize(func, [2, 2], jac=jac, tol=1.0,
+                                     method=method)
+            assert func(sol1.x) < func(sol2.x), \
+                   f"{method}: {func(sol1.x)} vs. {func(sol2.x)}"
+
+    @pytest.mark.fail_slow(5)
+    @pytest.mark.filterwarnings('ignore::UserWarning')
+    @pytest.mark.filterwarnings('ignore::RuntimeWarning')  # See gh-18547
+    @pytest.mark.parametrize('method',
+                             ['fmin', 'fmin_powell', 'fmin_cg', 'fmin_bfgs',
+                              'fmin_ncg', 'fmin_l_bfgs_b', 'fmin_tnc',
+                              'fmin_slsqp'] + MINIMIZE_METHODS)
+    def test_minimize_callback_copies_array(self, method):
+        # Check that arrays passed to callbacks are not modified
+        # inplace by the optimizer afterward
+
+        if method in ('fmin_tnc', 'fmin_l_bfgs_b'):
+            def func(x):
+                return optimize.rosen(x), optimize.rosen_der(x)
+        else:
+            func = optimize.rosen
+            jac = optimize.rosen_der
+            hess = optimize.rosen_hess
+
+        x0 = np.zeros(10)
+
+        # Set options
+        kwargs = {}
+        if method.startswith('fmin'):
+            routine = getattr(optimize, method)
+            if method == 'fmin_slsqp':
+                kwargs['iter'] = 5
+            elif method == 'fmin_tnc':
+                kwargs['maxfun'] = 100
+            elif method in ('fmin', 'fmin_powell'):
+                kwargs['maxiter'] = 3500
+            else:
+                kwargs['maxiter'] = 5
+        else:
+            def routine(*a, **kw):
+                kw['method'] = method
+                return optimize.minimize(*a, **kw)
+
+            if method == 'tnc':
+                kwargs['options'] = dict(maxfun=100)
+            else:
+                kwargs['options'] = dict(maxiter=5)
+
+        if method in ('fmin_ncg',):
+            kwargs['fprime'] = jac
+        elif method in ('newton-cg',):
+            kwargs['jac'] = jac
+        elif method in ('trust-krylov', 'trust-exact', 'trust-ncg', 'dogleg',
+                        'trust-constr'):
+            kwargs['jac'] = jac
+            kwargs['hess'] = hess
+
+        # Run with callback
+        results = []
+
+        def callback(x, *args, **kwargs):
+            assert not isinstance(x, optimize.OptimizeResult)
+            results.append((x, np.copy(x)))
+
+        routine(func, x0, callback=callback, **kwargs)
+
+        # Check returned arrays coincide with their copies
+        # and have no memory overlap
+        assert len(results) > 2
+        assert all(np.all(x == y) for x, y in results)
+        combinations = itertools.combinations(results, 2)
+        assert not any(np.may_share_memory(x[0], y[0]) for x, y in combinations)
+
+    @pytest.mark.parametrize('method', ['nelder-mead', 'powell', 'cg',
+                                        'bfgs', 'newton-cg', 'l-bfgs-b',
+                                        'tnc', 'cobyla', 'cobyqa', 'slsqp'])
+    def test_no_increase(self, method):
+        # Check that the solver doesn't return a value worse than the
+        # initial point.
+
+        def func(x):
+            return (x - 1)**2
+
+        def bad_grad(x):
+            # purposefully invalid gradient function, simulates a case
+            # where line searches start failing
+            return 2*(x - 1) * (-1) - 2
+
+        x0 = np.array([2.0])
+        f0 = func(x0)
+        jac = bad_grad
+        options = dict(maxfun=20) if method == 'tnc' else dict(maxiter=20)
+        if method in ['nelder-mead', 'powell', 'cobyla', 'cobyqa']:
+            jac = None
+        sol = optimize.minimize(func, x0, jac=jac, method=method,
+                                options=options)
+        assert_equal(func(sol.x), sol.fun)
+
+        if method == 'slsqp':
+            pytest.xfail("SLSQP returns slightly worse")
+        assert func(sol.x) <= f0
+
+    def test_slsqp_respect_bounds(self):
+        # Regression test for gh-3108
+        def f(x):
+            return sum((x - np.array([1., 2., 3., 4.]))**2)
+
+        def cons(x):
+            a = np.array([[-1, -1, -1, -1], [-3, -3, -2, -1]])
+            return np.concatenate([np.dot(a, x) + np.array([5, 10]), x])
+
+        x0 = np.array([0.5, 1., 1.5, 2.])
+        res = optimize.minimize(f, x0, method='slsqp',
+                                constraints={'type': 'ineq', 'fun': cons})
+        assert_allclose(res.x, np.array([0., 2, 5, 8])/3, atol=1e-12)
+
+    @pytest.mark.parametrize('method', ['Nelder-Mead', 'Powell', 'CG', 'BFGS',
+                                        'Newton-CG', 'L-BFGS-B', 'SLSQP',
+                                        'trust-constr', 'dogleg', 'trust-ncg',
+                                        'trust-exact', 'trust-krylov',
+                                        'cobyqa'])
+    def test_respect_maxiter(self, method):
+        # Check that the number of iterations equals max_iter, assuming
+        # convergence doesn't establish before
+        MAXITER = 4
+
+        x0 = np.zeros(10)
+
+        sf = ScalarFunction(optimize.rosen, x0, (), optimize.rosen_der,
+                            optimize.rosen_hess, None, None)
+
+        # Set options
+        kwargs = {'method': method, 'options': dict(maxiter=MAXITER)}
+
+        if method in ('Newton-CG',):
+            kwargs['jac'] = sf.grad
+        elif method in ('trust-krylov', 'trust-exact', 'trust-ncg', 'dogleg',
+                        'trust-constr'):
+            kwargs['jac'] = sf.grad
+            kwargs['hess'] = sf.hess
+
+        sol = optimize.minimize(sf.fun, x0, **kwargs)
+        assert sol.nit == MAXITER
+        assert sol.nfev >= sf.nfev
+        if hasattr(sol, 'njev'):
+            assert sol.njev >= sf.ngev
+
+        # method specific tests
+        if method == 'SLSQP':
+            assert sol.status == 9  # Iteration limit reached
+        elif method == 'cobyqa':
+            assert sol.status == 6  # Iteration limit reached
+
+    @pytest.mark.parametrize('method', ['Nelder-Mead', 'Powell',
+                                        'fmin', 'fmin_powell'])
+    def test_runtime_warning(self, method):
+        x0 = np.zeros(10)
+        sf = ScalarFunction(optimize.rosen, x0, (), optimize.rosen_der,
+                            optimize.rosen_hess, None, None)
+        options = {"maxiter": 1, "disp": True}
+        with pytest.warns(RuntimeWarning,
+                          match=r'Maximum number of iterations'):
+            if method.startswith('fmin'):
+                routine = getattr(optimize, method)
+                routine(sf.fun, x0, **options)
+            else:
+                optimize.minimize(sf.fun, x0, method=method, options=options)
+
+    def test_respect_maxiter_trust_constr_ineq_constraints(self):
+        # special case of minimization with trust-constr and inequality
+        # constraints to check maxiter limit is obeyed when using internal
+        # method 'tr_interior_point'
+        MAXITER = 4
+        f = optimize.rosen
+        jac = optimize.rosen_der
+        hess = optimize.rosen_hess
+
+        def fun(x):
+            return np.array([0.2 * x[0] - 0.4 * x[1] - 0.33 * x[2]])
+        cons = ({'type': 'ineq',
+                 'fun': fun},)
+
+        x0 = np.zeros(10)
+        sol = optimize.minimize(f, x0, constraints=cons, jac=jac, hess=hess,
+                                method='trust-constr',
+                                options=dict(maxiter=MAXITER))
+        assert sol.nit == MAXITER
+
+    def test_minimize_automethod(self):
+        def f(x):
+            return x**2
+
+        def cons(x):
+            return x - 2
+
+        x0 = np.array([10.])
+        sol_0 = optimize.minimize(f, x0)
+        sol_1 = optimize.minimize(f, x0, constraints=[{'type': 'ineq',
+                                                       'fun': cons}])
+        sol_2 = optimize.minimize(f, x0, bounds=[(5, 10)])
+        sol_3 = optimize.minimize(f, x0,
+                                  constraints=[{'type': 'ineq', 'fun': cons}],
+                                  bounds=[(5, 10)])
+        sol_4 = optimize.minimize(f, x0,
+                                  constraints=[{'type': 'ineq', 'fun': cons}],
+                                  bounds=[(1, 10)])
+        for sol in [sol_0, sol_1, sol_2, sol_3, sol_4]:
+            assert sol.success
+        assert_allclose(sol_0.x, 0, atol=1e-7)
+        assert_allclose(sol_1.x, 2, atol=1e-7)
+        assert_allclose(sol_2.x, 5, atol=1e-7)
+        assert_allclose(sol_3.x, 5, atol=1e-7)
+        assert_allclose(sol_4.x, 2, atol=1e-7)
+
+    def test_minimize_coerce_args_param(self):
+        # Regression test for gh-3503
+        def Y(x, c):
+            return np.sum((x-c)**2)
+
+        def dY_dx(x, c=None):
+            return 2*(x-c)
+
+        c = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5])
+        xinit = np.random.randn(len(c))
+        optimize.minimize(Y, xinit, jac=dY_dx, args=(c), method="BFGS")
+
+    def test_initial_step_scaling(self):
+        # Check that optimizer initial step is not huge even if the
+        # function and gradients are
+
+        scales = [1e-50, 1, 1e50]
+        methods = ['CG', 'BFGS', 'L-BFGS-B', 'Newton-CG']
+
+        def f(x):
+            if first_step_size[0] is None and x[0] != x0[0]:
+                first_step_size[0] = abs(x[0] - x0[0])
+            if abs(x).max() > 1e4:
+                raise AssertionError("Optimization stepped far away!")
+            return scale*(x[0] - 1)**2
+
+        def g(x):
+            return np.array([scale*(x[0] - 1)])
+
+        for scale, method in itertools.product(scales, methods):
+            if method in ('CG', 'BFGS'):
+                options = dict(gtol=scale*1e-8)
+            else:
+                options = dict()
+
+            if scale < 1e-10 and method in ('L-BFGS-B', 'Newton-CG'):
+                # XXX: return initial point if they see small gradient
+                continue
+
+            x0 = [-1.0]
+            first_step_size = [None]
+            res = optimize.minimize(f, x0, jac=g, method=method,
+                                    options=options)
+
+            err_msg = f"{method} {scale}: {first_step_size}: {res}"
+
+            assert res.success, err_msg
+            assert_allclose(res.x, [1.0], err_msg=err_msg)
+            assert res.nit <= 3, err_msg
+
+            if scale > 1e-10:
+                if method in ('CG', 'BFGS'):
+                    assert_allclose(first_step_size[0], 1.01, err_msg=err_msg)
+                else:
+                    # Newton-CG and L-BFGS-B use different logic for the first
+                    # step, but are both scaling invariant with step sizes ~ 1
+                    assert first_step_size[0] > 0.5 and first_step_size[0] < 3, err_msg
+            else:
+                # step size has upper bound of ||grad||, so line
+                # search makes many small steps
+                pass
+
+    @pytest.mark.parametrize('method', ['nelder-mead', 'powell', 'cg', 'bfgs',
+                                        'newton-cg', 'l-bfgs-b', 'tnc',
+                                        'cobyla', 'cobyqa', 'slsqp',
+                                        'trust-constr', 'dogleg', 'trust-ncg',
+                                        'trust-exact', 'trust-krylov'])
+    def test_nan_values(self, method):
+        # Check nan values result to failed exit status
+        np.random.seed(1234)
+
+        count = [0]
+
+        def func(x):
+            return np.nan
+
+        def func2(x):
+            count[0] += 1
+            if count[0] > 2:
+                return np.nan
+            else:
+                return np.random.rand()
+
+        def grad(x):
+            return np.array([1.0])
+
+        def hess(x):
+            return np.array([[1.0]])
+
+        x0 = np.array([1.0])
+
+        needs_grad = method in ('newton-cg', 'trust-krylov', 'trust-exact',
+                                'trust-ncg', 'dogleg')
+        needs_hess = method in ('trust-krylov', 'trust-exact', 'trust-ncg',
+                                'dogleg')
+
+        funcs = [func, func2]
+        grads = [grad] if needs_grad else [grad, None]
+        hesss = [hess] if needs_hess else [hess, None]
+        options = dict(maxfun=20) if method == 'tnc' else dict(maxiter=20)
+
+        with np.errstate(invalid='ignore'), suppress_warnings() as sup:
+            sup.filter(UserWarning, "delta_grad == 0.*")
+            sup.filter(RuntimeWarning, ".*does not use Hessian.*")
+            sup.filter(RuntimeWarning, ".*does not use gradient.*")
+
+            for f, g, h in itertools.product(funcs, grads, hesss):
+                count = [0]
+                sol = optimize.minimize(f, x0, jac=g, hess=h, method=method,
+                                        options=options)
+                assert_equal(sol.success, False)
+
+    @pytest.mark.parametrize('method', ['nelder-mead', 'cg', 'bfgs',
+                                        'l-bfgs-b', 'tnc',
+                                        'cobyla', 'cobyqa', 'slsqp',
+                                        'trust-constr', 'dogleg', 'trust-ncg',
+                                        'trust-exact', 'trust-krylov'])
+    def test_duplicate_evaluations(self, method):
+        # check that there are no duplicate evaluations for any methods
+        jac = hess = None
+        if method in ('newton-cg', 'trust-krylov', 'trust-exact',
+                      'trust-ncg', 'dogleg'):
+            jac = self.grad
+        if method in ('trust-krylov', 'trust-exact', 'trust-ncg',
+                      'dogleg'):
+            hess = self.hess
+
+        with np.errstate(invalid='ignore'), suppress_warnings() as sup:
+            # for trust-constr
+            sup.filter(UserWarning, "delta_grad == 0.*")
+            optimize.minimize(self.func, self.startparams,
+                              method=method, jac=jac, hess=hess)
+
+        for i in range(1, len(self.trace)):
+            if np.array_equal(self.trace[i - 1], self.trace[i]):
+                raise RuntimeError(
+                    f"Duplicate evaluations made by {method}")
+
+    @pytest.mark.filterwarnings('ignore::RuntimeWarning')
+    @pytest.mark.parametrize('method', MINIMIZE_METHODS_NEW_CB)
+    @pytest.mark.parametrize('new_cb_interface', [0, 1, 2])
+    def test_callback_stopiteration(self, method, new_cb_interface):
+        # Check that if callback raises StopIteration, optimization
+        # terminates with the same result as if iterations were limited
+
+        def f(x):
+            f.flag = False  # check that f isn't called after StopIteration
+            return optimize.rosen(x)
+        f.flag = False
+
+        def g(x):
+            f.flag = False
+            return optimize.rosen_der(x)
+
+        def h(x):
+            f.flag = False
+            return optimize.rosen_hess(x)
+
+        maxiter = 5
+
+        if new_cb_interface == 1:
+            def callback_interface(*, intermediate_result):
+                assert intermediate_result.fun == f(intermediate_result.x)
+                callback()
+        elif new_cb_interface == 2:
+            class Callback:
+                def __call__(self, intermediate_result: OptimizeResult):
+                    assert intermediate_result.fun == f(intermediate_result.x)
+                    callback()
+            callback_interface = Callback()
+        else:
+            def callback_interface(xk, *args):  # type: ignore[misc]
+                callback()
+
+        def callback():
+            callback.i += 1
+            callback.flag = False
+            if callback.i == maxiter:
+                callback.flag = True
+                raise StopIteration()
+        callback.i = 0
+        callback.flag = False
+
+        kwargs = {'x0': [1.1]*5, 'method': method,
+                  'fun': f, 'jac': g, 'hess': h}
+
+        res = optimize.minimize(**kwargs, callback=callback_interface)
+        if method == 'nelder-mead':
+            maxiter = maxiter + 1  # nelder-mead counts differently
+        if method == 'cobyqa':
+            ref = optimize.minimize(**kwargs, options={'maxfev': maxiter})
+            assert res.nfev == ref.nfev == maxiter
+        else:
+            ref = optimize.minimize(**kwargs, options={'maxiter': maxiter})
+            assert res.nit == ref.nit == maxiter
+        assert res.fun == ref.fun
+        assert_equal(res.x, ref.x)
+        assert res.status == (3 if method in [
+            'trust-constr',
+            'cobyqa',
+        ] else 99)
+
+    def test_ndim_error(self):
+        msg = "'x0' must only have one dimension."
+        with assert_raises(ValueError, match=msg):
+            optimize.minimize(lambda x: x, np.ones((2, 1)))
+
+    @pytest.mark.parametrize('method', ('nelder-mead', 'l-bfgs-b', 'tnc',
+                                        'powell', 'cobyla', 'cobyqa',
+                                        'trust-constr'))
+    def test_minimize_invalid_bounds(self, method):
+        def f(x):
+            return np.sum(x**2)
+
+        bounds = Bounds([1, 2], [3, 4])
+        msg = 'The number of bounds is not compatible with the length of `x0`.'
+        with pytest.raises(ValueError, match=msg):
+            optimize.minimize(f, x0=[1, 2, 3], method=method, bounds=bounds)
+
+        bounds = Bounds([1, 6, 1], [3, 4, 2])
+        msg = 'An upper bound is less than the corresponding lower bound.'
+        with pytest.raises(ValueError, match=msg):
+            optimize.minimize(f, x0=[1, 2, 3], method=method, bounds=bounds)
+
+    @pytest.mark.parametrize('method', ['bfgs', 'cg', 'newton-cg', 'powell'])
+    def test_minimize_warnings_gh1953(self, method):
+        # test that minimize methods produce warnings rather than just using
+        # `print`; see gh-1953.
+        kwargs = {} if method=='powell' else {'jac': optimize.rosen_der}
+        warning_type = (RuntimeWarning if method=='powell'
+                        else optimize.OptimizeWarning)
+
+        options = {'disp': True, 'maxiter': 10}
+        with pytest.warns(warning_type, match='Maximum number'):
+            optimize.minimize(lambda x: optimize.rosen(x), [0, 0],
+                              method=method, options=options, **kwargs)
+
+        options['disp'] = False
+        optimize.minimize(lambda x: optimize.rosen(x), [0, 0],
+                          method=method, options=options, **kwargs)
+
+
+@pytest.mark.parametrize(
+    'method',
+    ['l-bfgs-b', 'tnc', 'Powell', 'Nelder-Mead', 'cobyqa']
+)
+def test_minimize_with_scalar(method):
+    # checks that minimize works with a scalar being provided to it.
+    def f(x):
+        return np.sum(x ** 2)
+
+    res = optimize.minimize(f, 17, bounds=[(-100, 100)], method=method)
+    assert res.success
+    assert_allclose(res.x, [0.0], atol=1e-5)
+
+
+class TestLBFGSBBounds:
+    def setup_method(self):
+        self.bounds = ((1, None), (None, None))
+        self.solution = (1, 0)
+
+    def fun(self, x, p=2.0):
+        return 1.0 / p * (x[0]**p + x[1]**p)
+
+    def jac(self, x, p=2.0):
+        return x**(p - 1)
+
+    def fj(self, x, p=2.0):
+        return self.fun(x, p), self.jac(x, p)
+
+    def test_l_bfgs_b_bounds(self):
+        x, f, d = optimize.fmin_l_bfgs_b(self.fun, [0, -1],
+                                         fprime=self.jac,
+                                         bounds=self.bounds)
+        assert d['warnflag'] == 0, d['task']
+        assert_allclose(x, self.solution, atol=1e-6)
+
+    def test_l_bfgs_b_funjac(self):
+        # L-BFGS-B with fun and jac combined and extra arguments
+        x, f, d = optimize.fmin_l_bfgs_b(self.fj, [0, -1], args=(2.0, ),
+                                         bounds=self.bounds)
+        assert d['warnflag'] == 0, d['task']
+        assert_allclose(x, self.solution, atol=1e-6)
+
+    def test_minimize_l_bfgs_b_bounds(self):
+        # Minimize with method='L-BFGS-B' with bounds
+        res = optimize.minimize(self.fun, [0, -1], method='L-BFGS-B',
+                                jac=self.jac, bounds=self.bounds)
+        assert res['success'], res['message']
+        assert_allclose(res.x, self.solution, atol=1e-6)
+
+    @pytest.mark.parametrize('bounds', [
+        ([(10, 1), (1, 10)]),
+        ([(1, 10), (10, 1)]),
+        ([(10, 1), (10, 1)])
+    ])
+    def test_minimize_l_bfgs_b_incorrect_bounds(self, bounds):
+        with pytest.raises(ValueError, match='.*bound.*'):
+            optimize.minimize(self.fun, [0, -1], method='L-BFGS-B',
+                              jac=self.jac, bounds=bounds)
+
+    def test_minimize_l_bfgs_b_bounds_FD(self):
+        # test that initial starting value outside bounds doesn't raise
+        # an error (done with clipping).
+        # test all different finite differences combos, with and without args
+
+        jacs = ['2-point', '3-point', None]
+        argss = [(2.,), ()]
+        for jac, args in itertools.product(jacs, argss):
+            res = optimize.minimize(self.fun, [0, -1], args=args,
+                                    method='L-BFGS-B',
+                                    jac=jac, bounds=self.bounds,
+                                    options={'finite_diff_rel_step': None})
+            assert res['success'], res['message']
+            assert_allclose(res.x, self.solution, atol=1e-6)
+
+
+class TestOptimizeScalar:
+    def setup_method(self):
+        self.solution = 1.5
+
+    def fun(self, x, a=1.5):
+        """Objective function"""
+        return (x - a)**2 - 0.8
+
+    def test_brent(self):
+        x = optimize.brent(self.fun)
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.brent(self.fun, brack=(-3, -2))
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.brent(self.fun, full_output=True)
+        assert_allclose(x[0], self.solution, atol=1e-6)
+
+        x = optimize.brent(self.fun, brack=(-15, -1, 15))
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        message = r"\(f\(xb\) < f\(xa\)\) and \(f\(xb\) < f\(xc\)\)"
+        with pytest.raises(ValueError, match=message):
+            optimize.brent(self.fun, brack=(-1, 0, 1))
+
+        message = r"\(xa < xb\) and \(xb < xc\)"
+        with pytest.raises(ValueError, match=message):
+            optimize.brent(self.fun, brack=(0, -1, 1))
+
+    @pytest.mark.filterwarnings('ignore::UserWarning')
+    def test_golden(self):
+        x = optimize.golden(self.fun)
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.golden(self.fun, brack=(-3, -2))
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.golden(self.fun, full_output=True)
+        assert_allclose(x[0], self.solution, atol=1e-6)
+
+        x = optimize.golden(self.fun, brack=(-15, -1, 15))
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.golden(self.fun, tol=0)
+        assert_allclose(x, self.solution)
+
+        maxiter_test_cases = [0, 1, 5]
+        for maxiter in maxiter_test_cases:
+            x0 = optimize.golden(self.fun, maxiter=0, full_output=True)
+            x = optimize.golden(self.fun, maxiter=maxiter, full_output=True)
+            nfev0, nfev = x0[2], x[2]
+            assert_equal(nfev - nfev0, maxiter)
+
+        message = r"\(f\(xb\) < f\(xa\)\) and \(f\(xb\) < f\(xc\)\)"
+        with pytest.raises(ValueError, match=message):
+            optimize.golden(self.fun, brack=(-1, 0, 1))
+
+        message = r"\(xa < xb\) and \(xb < xc\)"
+        with pytest.raises(ValueError, match=message):
+            optimize.golden(self.fun, brack=(0, -1, 1))
+
+    def test_fminbound(self):
+        x = optimize.fminbound(self.fun, 0, 1)
+        assert_allclose(x, 1, atol=1e-4)
+
+        x = optimize.fminbound(self.fun, 1, 5)
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.fminbound(self.fun, np.array([1]), np.array([5]))
+        assert_allclose(x, self.solution, atol=1e-6)
+        assert_raises(ValueError, optimize.fminbound, self.fun, 5, 1)
+
+    def test_fminbound_scalar(self):
+        with pytest.raises(ValueError, match='.*must be finite scalars.*'):
+            optimize.fminbound(self.fun, np.zeros((1, 2)), 1)
+
+        x = optimize.fminbound(self.fun, 1, np.array(5))
+        assert_allclose(x, self.solution, atol=1e-6)
+
+    def test_gh11207(self):
+        def fun(x):
+            return x**2
+        optimize.fminbound(fun, 0, 0)
+
+    def test_minimize_scalar(self):
+        # combine all tests above for the minimize_scalar wrapper
+        x = optimize.minimize_scalar(self.fun).x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, method='Brent')
+        assert x.success
+
+        x = optimize.minimize_scalar(self.fun, method='Brent',
+                                     options=dict(maxiter=3))
+        assert not x.success
+
+        x = optimize.minimize_scalar(self.fun, bracket=(-3, -2),
+                                     args=(1.5, ), method='Brent').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, method='Brent',
+                                     args=(1.5,)).x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, bracket=(-15, -1, 15),
+                                     args=(1.5, ), method='Brent').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, bracket=(-3, -2),
+                                     args=(1.5, ), method='golden').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, method='golden',
+                                     args=(1.5,)).x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, bracket=(-15, -1, 15),
+                                     args=(1.5, ), method='golden').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, bounds=(0, 1), args=(1.5,),
+                                     method='Bounded').x
+        assert_allclose(x, 1, atol=1e-4)
+
+        x = optimize.minimize_scalar(self.fun, bounds=(1, 5), args=(1.5, ),
+                                     method='bounded').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        x = optimize.minimize_scalar(self.fun, bounds=(np.array([1]),
+                                                       np.array([5])),
+                                     args=(np.array([1.5]), ),
+                                     method='bounded').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+        assert_raises(ValueError, optimize.minimize_scalar, self.fun,
+                      bounds=(5, 1), method='bounded', args=(1.5, ))
+
+        assert_raises(ValueError, optimize.minimize_scalar, self.fun,
+                      bounds=(np.zeros(2), 1), method='bounded', args=(1.5, ))
+
+        x = optimize.minimize_scalar(self.fun, bounds=(1, np.array(5)),
+                                     method='bounded').x
+        assert_allclose(x, self.solution, atol=1e-6)
+
+    def test_minimize_scalar_custom(self):
+        # This function comes from the documentation example.
+        def custmin(fun, bracket, args=(), maxfev=None, stepsize=0.1,
+                    maxiter=100, callback=None, **options):
+            bestx = (bracket[1] + bracket[0]) / 2.0
+            besty = fun(bestx)
+            funcalls = 1
+            niter = 0
+            improved = True
+            stop = False
+
+            while improved and not stop and niter < maxiter:
+                improved = False
+                niter += 1
+                for testx in [bestx - stepsize, bestx + stepsize]:
+                    testy = fun(testx, *args)
+                    funcalls += 1
+                    if testy < besty:
+                        besty = testy
+                        bestx = testx
+                        improved = True
+                if callback is not None:
+                    callback(bestx)
+                if maxfev is not None and funcalls >= maxfev:
+                    stop = True
+                    break
+
+            return optimize.OptimizeResult(fun=besty, x=bestx, nit=niter,
+                                           nfev=funcalls, success=(niter > 1))
+
+        res = optimize.minimize_scalar(self.fun, bracket=(0, 4),
+                                       method=custmin,
+                                       options=dict(stepsize=0.05))
+        assert_allclose(res.x, self.solution, atol=1e-6)
+
+    def test_minimize_scalar_coerce_args_param(self):
+        # Regression test for gh-3503
+        optimize.minimize_scalar(self.fun, args=1.5)
+
+    @pytest.mark.parametrize('method', ['brent', 'bounded', 'golden'])
+    def test_disp(self, method):
+        # test that all minimize_scalar methods accept a disp option.
+        for disp in [0, 1, 2, 3]:
+            optimize.minimize_scalar(self.fun, options={"disp": disp})
+
+    @pytest.mark.parametrize('method', ['brent', 'bounded', 'golden'])
+    def test_result_attributes(self, method):
+        kwargs = {"bounds": [-10, 10]} if method == 'bounded' else {}
+        result = optimize.minimize_scalar(self.fun, method=method, **kwargs)
+        assert hasattr(result, "x")
+        assert hasattr(result, "success")
+        assert hasattr(result, "message")
+        assert hasattr(result, "fun")
+        assert hasattr(result, "nfev")
+        assert hasattr(result, "nit")
+
+    @pytest.mark.filterwarnings('ignore::UserWarning')
+    @pytest.mark.parametrize('method', ['brent', 'bounded', 'golden'])
+    def test_nan_values(self, method):
+        # Check nan values result to failed exit status
+        np.random.seed(1234)
+
+        count = [0]
+
+        def func(x):
+            count[0] += 1
+            if count[0] > 4:
+                return np.nan
+            else:
+                return x**2 + 0.1 * np.sin(x)
+
+        bracket = (-1, 0, 1)
+        bounds = (-1, 1)
+
+        with np.errstate(invalid='ignore'), suppress_warnings() as sup:
+            sup.filter(UserWarning, "delta_grad == 0.*")
+            sup.filter(RuntimeWarning, ".*does not use Hessian.*")
+            sup.filter(RuntimeWarning, ".*does not use gradient.*")
+
+            count = [0]
+
+            kwargs = {"bounds": bounds} if method == 'bounded' else {}
+            sol = optimize.minimize_scalar(func, bracket=bracket,
+                                           **kwargs, method=method,
+                                           options=dict(maxiter=20))
+            assert_equal(sol.success, False)
+
+    def test_minimize_scalar_defaults_gh10911(self):
+        # Previously, bounds were silently ignored unless `method='bounds'`
+        # was chosen. See gh-10911. Check that this is no longer the case.
+        def f(x):
+            return x**2
+
+        res = optimize.minimize_scalar(f)
+        assert_allclose(res.x, 0, atol=1e-8)
+
+        res = optimize.minimize_scalar(f, bounds=(1, 100),
+                                       options={'xatol': 1e-10})
+        assert_allclose(res.x, 1)
+
+    def test_minimize_non_finite_bounds_gh10911(self):
+        # Previously, minimize_scalar misbehaved with infinite bounds.
+        # See gh-10911. Check that it now raises an error, instead.
+        msg = "Optimization bounds must be finite scalars."
+        with pytest.raises(ValueError, match=msg):
+            optimize.minimize_scalar(np.sin, bounds=(1, np.inf))
+        with pytest.raises(ValueError, match=msg):
+            optimize.minimize_scalar(np.sin, bounds=(np.nan, 1))
+
+    @pytest.mark.parametrize("method", ['brent', 'golden'])
+    def test_minimize_unbounded_method_with_bounds_gh10911(self, method):
+        # Previously, `bounds` were silently ignored when `method='brent'` or
+        # `method='golden'`. See gh-10911. Check that error is now raised.
+        msg = "Use of `bounds` is incompatible with..."
+        with pytest.raises(ValueError, match=msg):
+            optimize.minimize_scalar(np.sin, method=method, bounds=(1, 2))
+
+    @pytest.mark.filterwarnings('ignore::RuntimeWarning')
+    @pytest.mark.parametrize("method", MINIMIZE_SCALAR_METHODS)
+    @pytest.mark.parametrize("tol", [1, 1e-6])
+    @pytest.mark.parametrize("fshape", [(), (1,), (1, 1)])
+    def test_minimize_scalar_dimensionality_gh16196(self, method, tol, fshape):
+        # gh-16196 reported that the output shape of `minimize_scalar` was not
+        # consistent when an objective function returned an array. Check that
+        # `res.fun` and `res.x` are now consistent.
+        def f(x):
+            return np.array(x**4).reshape(fshape)
+
+        a, b = -0.1, 0.2
+        kwargs = (dict(bracket=(a, b)) if method != "bounded"
+                  else dict(bounds=(a, b)))
+        kwargs.update(dict(method=method, tol=tol))
+
+        res = optimize.minimize_scalar(f, **kwargs)
+        assert res.x.shape == res.fun.shape == f(res.x).shape == fshape
+
+    @pytest.mark.parametrize('method', ['bounded', 'brent', 'golden'])
+    def test_minimize_scalar_warnings_gh1953(self, method):
+        # test that minimize_scalar methods produce warnings rather than just
+        # using `print`; see gh-1953.
+        def f(x):
+            return (x - 1)**2
+
+        kwargs = {}
+        kwd = 'bounds' if method == 'bounded' else 'bracket'
+        kwargs[kwd] = [-2, 10]
+
+        options = {'disp': True, 'maxiter': 3}
+        with pytest.warns(optimize.OptimizeWarning, match='Maximum number'):
+            optimize.minimize_scalar(f, method=method, options=options,
+                                     **kwargs)
+
+        options['disp'] = False
+        optimize.minimize_scalar(f, method=method, options=options, **kwargs)
+
+
+class TestBracket:
+
+    @pytest.mark.filterwarnings('ignore::RuntimeWarning')
+    def test_errors_and_status_false(self):
+        # Check that `bracket` raises the errors it is supposed to
+        def f(x):  # gh-14858
+            return x**2 if ((-1 < x) & (x < 1)) else 100.0
+
+        message = "The algorithm terminated without finding a valid bracket."
+        with pytest.raises(RuntimeError, match=message):
+            optimize.bracket(f, -1, 1)
+        with pytest.raises(RuntimeError, match=message):
+            optimize.bracket(f, -1, np.inf)
+        with pytest.raises(RuntimeError, match=message):
+            optimize.brent(f, brack=(-1, 1))
+        with pytest.raises(RuntimeError, match=message):
+            optimize.golden(f, brack=(-1, 1))
+
+        def f(x):  # gh-5899
+            return -5 * x**5 + 4 * x**4 - 12 * x**3 + 11 * x**2 - 2 * x + 1
+
+        message = "No valid bracket was found before the iteration limit..."
+        with pytest.raises(RuntimeError, match=message):
+            optimize.bracket(f, -0.5, 0.5, maxiter=10)
+
+    @pytest.mark.parametrize('method', ('brent', 'golden'))
+    def test_minimize_scalar_success_false(self, method):
+        # Check that status information from `bracket` gets to minimize_scalar
+        def f(x):  # gh-14858
+            return x**2 if ((-1 < x) & (x < 1)) else 100.0
+
+        message = "The algorithm terminated without finding a valid bracket."
+
+        res = optimize.minimize_scalar(f, bracket=(-1, 1), method=method)
+        assert not res.success
+        assert message in res.message
+        assert res.nfev == 3
+        assert res.nit == 0
+        assert res.fun == 100
+
+
+def test_brent_negative_tolerance():
+    assert_raises(ValueError, optimize.brent, np.cos, tol=-.01)
+
+
+class TestNewtonCg:
+    def test_rosenbrock(self):
+        x0 = np.array([-1.2, 1.0])
+        sol = optimize.minimize(optimize.rosen, x0,
+                                jac=optimize.rosen_der,
+                                hess=optimize.rosen_hess,
+                                tol=1e-5,
+                                method='Newton-CG')
+        assert sol.success, sol.message
+        assert_allclose(sol.x, np.array([1, 1]), rtol=1e-4)
+
+    def test_himmelblau(self):
+        x0 = np.array(himmelblau_x0)
+        sol = optimize.minimize(himmelblau,
+                                x0,
+                                jac=himmelblau_grad,
+                                hess=himmelblau_hess,
+                                method='Newton-CG',
+                                tol=1e-6)
+        assert sol.success, sol.message
+        assert_allclose(sol.x, himmelblau_xopt, rtol=1e-4)
+        assert_allclose(sol.fun, himmelblau_min, atol=1e-4)
+
+    def test_finite_difference(self):
+        x0 = np.array([-1.2, 1.0])
+        sol = optimize.minimize(optimize.rosen, x0,
+                                jac=optimize.rosen_der,
+                                hess='2-point',
+                                tol=1e-5,
+                                method='Newton-CG')
+        assert sol.success, sol.message
+        assert_allclose(sol.x, np.array([1, 1]), rtol=1e-4)
+
+    def test_hessian_update_strategy(self):
+        x0 = np.array([-1.2, 1.0])
+        sol = optimize.minimize(optimize.rosen, x0,
+                                jac=optimize.rosen_der,
+                                hess=optimize.BFGS(),
+                                tol=1e-5,
+                                method='Newton-CG')
+        assert sol.success, sol.message
+        assert_allclose(sol.x, np.array([1, 1]), rtol=1e-4)
+
+
+def test_line_for_search():
+    # _line_for_search is only used in _linesearch_powell, which is also
+    # tested below. Thus there are more tests of _line_for_search in the
+    # test_linesearch_powell_bounded function.
+
+    line_for_search = optimize._optimize._line_for_search
+    # args are x0, alpha, lower_bound, upper_bound
+    # returns lmin, lmax
+
+    lower_bound = np.array([-5.3, -1, -1.5, -3])
+    upper_bound = np.array([1.9, 1, 2.8, 3])
+
+    # test when starting in the bounds
+    x0 = np.array([0., 0, 0, 0])
+    # and when starting outside of the bounds
+    x1 = np.array([0., 2, -3, 0])
+
+    all_tests = (
+        (x0, np.array([1., 0, 0, 0]), -5.3, 1.9),
+        (x0, np.array([0., 1, 0, 0]), -1, 1),
+        (x0, np.array([0., 0, 1, 0]), -1.5, 2.8),
+        (x0, np.array([0., 0, 0, 1]), -3, 3),
+        (x0, np.array([1., 1, 0, 0]), -1, 1),
+        (x0, np.array([1., 0, -1, 2]), -1.5, 1.5),
+        (x0, np.array([2., 0, -1, 2]), -1.5, 0.95),
+        (x1, np.array([1., 0, 0, 0]), -5.3, 1.9),
+        (x1, np.array([0., 1, 0, 0]), -3, -1),
+        (x1, np.array([0., 0, 1, 0]), 1.5, 5.8),
+        (x1, np.array([0., 0, 0, 1]), -3, 3),
+        (x1, np.array([1., 1, 0, 0]), -3, -1),
+        (x1, np.array([1., 0, -1, 0]), -5.3, -1.5),
+    )
+
+    for x, alpha, lmin, lmax in all_tests:
+        mi, ma = line_for_search(x, alpha, lower_bound, upper_bound)
+        assert_allclose(mi, lmin, atol=1e-6)
+        assert_allclose(ma, lmax, atol=1e-6)
+
+    # now with infinite bounds
+    lower_bound = np.array([-np.inf, -1, -np.inf, -3])
+    upper_bound = np.array([np.inf, 1, 2.8, np.inf])
+
+    all_tests = (
+        (x0, np.array([1., 0, 0, 0]), -np.inf, np.inf),
+        (x0, np.array([0., 1, 0, 0]), -1, 1),
+        (x0, np.array([0., 0, 1, 0]), -np.inf, 2.8),
+        (x0, np.array([0., 0, 0, 1]), -3, np.inf),
+        (x0, np.array([1., 1, 0, 0]), -1, 1),
+        (x0, np.array([1., 0, -1, 2]), -1.5, np.inf),
+        (x1, np.array([1., 0, 0, 0]), -np.inf, np.inf),
+        (x1, np.array([0., 1, 0, 0]), -3, -1),
+        (x1, np.array([0., 0, 1, 0]), -np.inf, 5.8),
+        (x1, np.array([0., 0, 0, 1]), -3, np.inf),
+        (x1, np.array([1., 1, 0, 0]), -3, -1),
+        (x1, np.array([1., 0, -1, 0]), -5.8, np.inf),
+    )
+
+    for x, alpha, lmin, lmax in all_tests:
+        mi, ma = line_for_search(x, alpha, lower_bound, upper_bound)
+        assert_allclose(mi, lmin, atol=1e-6)
+        assert_allclose(ma, lmax, atol=1e-6)
+
+
+def test_linesearch_powell():
+    # helper function in optimize.py, not a public function.
+    linesearch_powell = optimize._optimize._linesearch_powell
+    # args are func, p, xi, fval, lower_bound=None, upper_bound=None, tol=1e-3
+    # returns new_fval, p + direction, direction
+    def func(x):
+        return np.sum((x - np.array([-1.0, 2.0, 1.5, -0.4])) ** 2)
+    p0 = np.array([0., 0, 0, 0])
+    fval = func(p0)
+    lower_bound = np.array([-np.inf] * 4)
+    upper_bound = np.array([np.inf] * 4)
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), -1),
+        (np.array([0., 1, 0, 0]), 2),
+        (np.array([0., 0, 1, 0]), 1.5),
+        (np.array([0., 0, 0, 1]), -.4),
+        (np.array([-1., 0, 1, 0]), 1.25),
+        (np.array([0., 0, 1, 1]), .55),
+        (np.array([2., 0, -1, 1]), -.65),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi,
+                                            fval=fval, tol=1e-5)
+        assert_allclose(f, func(l * xi), atol=1e-6)
+        assert_allclose(p, l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(l * xi), atol=1e-6)
+        assert_allclose(p, l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+
+def test_linesearch_powell_bounded():
+    # helper function in optimize.py, not a public function.
+    linesearch_powell = optimize._optimize._linesearch_powell
+    # args are func, p, xi, fval, lower_bound=None, upper_bound=None, tol=1e-3
+    # returns new_fval, p+direction, direction
+    def func(x):
+        return np.sum((x - np.array([-1.0, 2.0, 1.5, -0.4])) ** 2)
+    p0 = np.array([0., 0, 0, 0])
+    fval = func(p0)
+
+    # first choose bounds such that the same tests from
+    # test_linesearch_powell should pass.
+    lower_bound = np.array([-2.]*4)
+    upper_bound = np.array([2.]*4)
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), -1),
+        (np.array([0., 1, 0, 0]), 2),
+        (np.array([0., 0, 1, 0]), 1.5),
+        (np.array([0., 0, 0, 1]), -.4),
+        (np.array([-1., 0, 1, 0]), 1.25),
+        (np.array([0., 0, 1, 1]), .55),
+        (np.array([2., 0, -1, 1]), -.65),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(l * xi), atol=1e-6)
+        assert_allclose(p, l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+    # now choose bounds such that unbounded vs bounded gives different results
+    lower_bound = np.array([-.3]*3 + [-1])
+    upper_bound = np.array([.45]*3 + [.9])
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), -.3),
+        (np.array([0., 1, 0, 0]), .45),
+        (np.array([0., 0, 1, 0]), .45),
+        (np.array([0., 0, 0, 1]), -.4),
+        (np.array([-1., 0, 1, 0]), .3),
+        (np.array([0., 0, 1, 1]), .45),
+        (np.array([2., 0, -1, 1]), -.15),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(l * xi), atol=1e-6)
+        assert_allclose(p, l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+    # now choose as above but start outside the bounds
+    p0 = np.array([-1., 0, 0, 2])
+    fval = func(p0)
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), .7),
+        (np.array([0., 1, 0, 0]), .45),
+        (np.array([0., 0, 1, 0]), .45),
+        (np.array([0., 0, 0, 1]), -2.4),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(p0 + l * xi), atol=1e-6)
+        assert_allclose(p, p0 + l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+    # now mix in inf
+    p0 = np.array([0., 0, 0, 0])
+    fval = func(p0)
+
+    # now choose bounds that mix inf
+    lower_bound = np.array([-.3, -np.inf, -np.inf, -1])
+    upper_bound = np.array([np.inf, .45, np.inf, .9])
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), -.3),
+        (np.array([0., 1, 0, 0]), .45),
+        (np.array([0., 0, 1, 0]), 1.5),
+        (np.array([0., 0, 0, 1]), -.4),
+        (np.array([-1., 0, 1, 0]), .3),
+        (np.array([0., 0, 1, 1]), .55),
+        (np.array([2., 0, -1, 1]), -.15),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(l * xi), atol=1e-6)
+        assert_allclose(p, l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+    # now choose as above but start outside the bounds
+    p0 = np.array([-1., 0, 0, 2])
+    fval = func(p0)
+
+    all_tests = (
+        (np.array([1., 0, 0, 0]), .7),
+        (np.array([0., 1, 0, 0]), .45),
+        (np.array([0., 0, 1, 0]), 1.5),
+        (np.array([0., 0, 0, 1]), -2.4),
+    )
+
+    for xi, l in all_tests:
+        f, p, direction = linesearch_powell(func, p0, xi, tol=1e-5,
+                                            lower_bound=lower_bound,
+                                            upper_bound=upper_bound,
+                                            fval=fval)
+        assert_allclose(f, func(p0 + l * xi), atol=1e-6)
+        assert_allclose(p, p0 + l * xi, atol=1e-6)
+        assert_allclose(direction, l * xi, atol=1e-6)
+
+
+def test_powell_limits():
+    # gh15342 - powell was going outside bounds for some function evaluations.
+    bounds = optimize.Bounds([0, 0], [0.6, 20])
+
+    def fun(x):
+        a, b = x
+        assert (x >= bounds.lb).all() and (x <= bounds.ub).all()
+        return a ** 2 + b ** 2
+
+    optimize.minimize(fun, x0=[0.6, 20], method='Powell', bounds=bounds)
+
+    # Another test from the original report - gh-13411
+    bounds = optimize.Bounds(lb=[0,], ub=[1,], keep_feasible=[True,])
+
+    def func(x):
+        assert x >= 0 and x <= 1
+        return np.exp(x)
+
+    optimize.minimize(fun=func, x0=[0.5], method='powell', bounds=bounds)
+
+
+class TestRosen:
+
+    def test_hess(self):
+        # Compare rosen_hess(x) times p with rosen_hess_prod(x,p). See gh-1775.
+        x = np.array([3, 4, 5])
+        p = np.array([2, 2, 2])
+        hp = optimize.rosen_hess_prod(x, p)
+        dothp = np.dot(optimize.rosen_hess(x), p)
+        assert_equal(hp, dothp)
+
+
+def himmelblau(p):
+    """
+    R^2 -> R^1 test function for optimization. The function has four local
+    minima where himmelblau(xopt) == 0.
+    """
+    x, y = p
+    a = x*x + y - 11
+    b = x + y*y - 7
+    return a*a + b*b
+
+
+def himmelblau_grad(p):
+    x, y = p
+    return np.array([4*x**3 + 4*x*y - 42*x + 2*y**2 - 14,
+                     2*x**2 + 4*x*y + 4*y**3 - 26*y - 22])
+
+
+def himmelblau_hess(p):
+    x, y = p
+    return np.array([[12*x**2 + 4*y - 42, 4*x + 4*y],
+                     [4*x + 4*y, 4*x + 12*y**2 - 26]])
+
+
+himmelblau_x0 = [-0.27, -0.9]
+himmelblau_xopt = [3, 2]
+himmelblau_min = 0.0
+
+
+def test_minimize_multiple_constraints():
+    # Regression test for gh-4240.
+    def func(x):
+        return np.array([25 - 0.2 * x[0] - 0.4 * x[1] - 0.33 * x[2]])
+
+    def func1(x):
+        return np.array([x[1]])
+
+    def func2(x):
+        return np.array([x[2]])
+
+    cons = ({'type': 'ineq', 'fun': func},
+            {'type': 'ineq', 'fun': func1},
+            {'type': 'ineq', 'fun': func2})
+
+    def f(x):
+        return -1 * (x[0] + x[1] + x[2])
+
+    res = optimize.minimize(f, [0, 0, 0], method='SLSQP', constraints=cons)
+    assert_allclose(res.x, [125, 0, 0], atol=1e-10)
+
+
+class TestOptimizeResultAttributes:
+    # Test that all minimizers return an OptimizeResult containing
+    # all the OptimizeResult attributes
+    def setup_method(self):
+        self.x0 = [5, 5]
+        self.func = optimize.rosen
+        self.jac = optimize.rosen_der
+        self.hess = optimize.rosen_hess
+        self.hessp = optimize.rosen_hess_prod
+        self.bounds = [(0., 10.), (0., 10.)]
+
+    def test_attributes_present(self):
+        attributes = ['nit', 'nfev', 'x', 'success', 'status', 'fun',
+                      'message']
+        skip = {'cobyla': ['nit']}
+        for method in MINIMIZE_METHODS:
+            with suppress_warnings() as sup:
+                sup.filter(RuntimeWarning,
+                           ("Method .+ does not use (gradient|Hessian.*)"
+                            " information"))
+                res = optimize.minimize(self.func, self.x0, method=method,
+                                        jac=self.jac, hess=self.hess,
+                                        hessp=self.hessp)
+            for attribute in attributes:
+                if method in skip and attribute in skip[method]:
+                    continue
+
+                assert hasattr(res, attribute)
+                assert attribute in dir(res)
+
+            # gh13001, OptimizeResult.message should be a str
+            assert isinstance(res.message, str)
+
+
+def f1(z, *params):
+    x, y = z
+    a, b, c, d, e, f, g, h, i, j, k, l, scale = params
+    return (a * x**2 + b * x * y + c * y**2 + d*x + e*y + f)
+
+
+def f2(z, *params):
+    x, y = z
+    a, b, c, d, e, f, g, h, i, j, k, l, scale = params
+    return (-g*np.exp(-((x-h)**2 + (y-i)**2) / scale))
+
+
+def f3(z, *params):
+    x, y = z
+    a, b, c, d, e, f, g, h, i, j, k, l, scale = params
+    return (-j*np.exp(-((x-k)**2 + (y-l)**2) / scale))
+
+
+def brute_func(z, *params):
+    return f1(z, *params) + f2(z, *params) + f3(z, *params)
+
+
+class TestBrute:
+    # Test the "brute force" method
+    def setup_method(self):
+        self.params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5)
+        self.rranges = (slice(-4, 4, 0.25), slice(-4, 4, 0.25))
+        self.solution = np.array([-1.05665192, 1.80834843])
+
+    def brute_func(self, z, *params):
+        # an instance method optimizing
+        return brute_func(z, *params)
+
+    def test_brute(self):
+        # test fmin
+        resbrute = optimize.brute(brute_func, self.rranges, args=self.params,
+                                  full_output=True, finish=optimize.fmin)
+        assert_allclose(resbrute[0], self.solution, atol=1e-3)
+        assert_allclose(resbrute[1], brute_func(self.solution, *self.params),
+                        atol=1e-3)
+
+        # test minimize
+        resbrute = optimize.brute(brute_func, self.rranges, args=self.params,
+                                  full_output=True,
+                                  finish=optimize.minimize)
+        assert_allclose(resbrute[0], self.solution, atol=1e-3)
+        assert_allclose(resbrute[1], brute_func(self.solution, *self.params),
+                        atol=1e-3)
+
+        # test that brute can optimize an instance method (the other tests use
+        # a non-class based function
+        resbrute = optimize.brute(self.brute_func, self.rranges,
+                                  args=self.params, full_output=True,
+                                  finish=optimize.minimize)
+        assert_allclose(resbrute[0], self.solution, atol=1e-3)
+
+    def test_1D(self):
+        # test that for a 1-D problem the test function is passed an array,
+        # not a scalar.
+        def f(x):
+            assert len(x.shape) == 1
+            assert x.shape[0] == 1
+            return x ** 2
+
+        optimize.brute(f, [(-1, 1)], Ns=3, finish=None)
+
+    @pytest.mark.fail_slow(5)
+    def test_workers(self):
+        # check that parallel evaluation works
+        resbrute = optimize.brute(brute_func, self.rranges, args=self.params,
+                                  full_output=True, finish=None)
+
+        resbrute1 = optimize.brute(brute_func, self.rranges, args=self.params,
+                                   full_output=True, finish=None, workers=2)
+
+        assert_allclose(resbrute1[-1], resbrute[-1])
+        assert_allclose(resbrute1[0], resbrute[0])
+
+    def test_runtime_warning(self, capsys):
+        rng = np.random.default_rng(1234)
+
+        def func(z, *params):
+            return rng.random(1) * 1000  # never converged problem
+
+        msg = "final optimization did not succeed.*|Maximum number of function eval.*"
+        with pytest.warns(RuntimeWarning, match=msg):
+            optimize.brute(func, self.rranges, args=self.params, disp=True)
+
+    def test_coerce_args_param(self):
+        # optimize.brute should coerce non-iterable args to a tuple.
+        def f(x, *args):
+            return x ** args[0]
+
+        resbrute = optimize.brute(f, (slice(-4, 4, .25),), args=2)
+        assert_allclose(resbrute, 0)
+
+
+@pytest.mark.fail_slow(10)
+def test_cobyla_threadsafe():
+
+    # Verify that cobyla is threadsafe. Will segfault if it is not.
+
+    import concurrent.futures
+    import time
+
+    def objective1(x):
+        time.sleep(0.1)
+        return x[0]**2
+
+    def objective2(x):
+        time.sleep(0.1)
+        return (x[0]-1)**2
+
+    min_method = "COBYLA"
+
+    def minimizer1():
+        return optimize.minimize(objective1,
+                                      [0.0],
+                                      method=min_method)
+
+    def minimizer2():
+        return optimize.minimize(objective2,
+                                      [0.0],
+                                      method=min_method)
+
+    with concurrent.futures.ThreadPoolExecutor() as pool:
+        tasks = []
+        tasks.append(pool.submit(minimizer1))
+        tasks.append(pool.submit(minimizer2))
+        for t in tasks:
+            t.result()
+
+
+class TestIterationLimits:
+    # Tests that optimisation does not give up before trying requested
+    # number of iterations or evaluations. And that it does not succeed
+    # by exceeding the limits.
+    def setup_method(self):
+        self.funcalls = 0
+
+    def slow_func(self, v):
+        self.funcalls += 1
+        r, t = np.sqrt(v[0]**2+v[1]**2), np.arctan2(v[0], v[1])
+        return np.sin(r*20 + t)+r*0.5
+
+    @pytest.mark.fail_slow(5)
+    def test_neldermead_limit(self):
+        self.check_limits("Nelder-Mead", 200)
+
+    def test_powell_limit(self):
+        self.check_limits("powell", 1000)
+
+    def check_limits(self, method, default_iters):
+        for start_v in [[0.1, 0.1], [1, 1], [2, 2]]:
+            for mfev in [50, 500, 5000]:
+                self.funcalls = 0
+                res = optimize.minimize(self.slow_func, start_v,
+                                        method=method,
+                                        options={"maxfev": mfev})
+                assert self.funcalls == res["nfev"]
+                if res["success"]:
+                    assert res["nfev"] < mfev
+                else:
+                    assert res["nfev"] >= mfev
+            for mit in [50, 500, 5000]:
+                res = optimize.minimize(self.slow_func, start_v,
+                                        method=method,
+                                        options={"maxiter": mit})
+                if res["success"]:
+                    assert res["nit"] <= mit
+                else:
+                    assert res["nit"] >= mit
+            for mfev, mit in [[50, 50], [5000, 5000], [5000, np.inf]]:
+                self.funcalls = 0
+                res = optimize.minimize(self.slow_func, start_v,
+                                        method=method,
+                                        options={"maxiter": mit,
+                                                 "maxfev": mfev})
+                assert self.funcalls == res["nfev"]
+                if res["success"]:
+                    assert res["nfev"] < mfev and res["nit"] <= mit
+                else:
+                    assert res["nfev"] >= mfev or res["nit"] >= mit
+            for mfev, mit in [[np.inf, None], [None, np.inf]]:
+                self.funcalls = 0
+                res = optimize.minimize(self.slow_func, start_v,
+                                        method=method,
+                                        options={"maxiter": mit,
+                                                 "maxfev": mfev})
+                assert self.funcalls == res["nfev"]
+                if res["success"]:
+                    if mfev is None:
+                        assert res["nfev"] < default_iters*2
+                    else:
+                        assert res["nit"] <= default_iters*2
+                else:
+                    assert (res["nfev"] >= default_iters*2
+                            or res["nit"] >= default_iters*2)
+
+
+def test_result_x_shape_when_len_x_is_one():
+    def fun(x):
+        return x * x
+
+    def jac(x):
+        return 2. * x
+
+    def hess(x):
+        return np.array([[2.]])
+
+    methods = ['Nelder-Mead', 'Powell', 'CG', 'BFGS', 'L-BFGS-B', 'TNC',
+               'COBYLA', 'COBYQA', 'SLSQP']
+    for method in methods:
+        res = optimize.minimize(fun, np.array([0.1]), method=method)
+        assert res.x.shape == (1,)
+
+    # use jac + hess
+    methods = ['trust-constr', 'dogleg', 'trust-ncg', 'trust-exact',
+               'trust-krylov', 'Newton-CG']
+    for method in methods:
+        res = optimize.minimize(fun, np.array([0.1]), method=method, jac=jac,
+                                hess=hess)
+        assert res.x.shape == (1,)
+
+
+class FunctionWithGradient:
+    def __init__(self):
+        self.number_of_calls = 0
+
+    def __call__(self, x):
+        self.number_of_calls += 1
+        return np.sum(x**2), 2 * x
+
+
+@pytest.fixture
+def function_with_gradient():
+    return FunctionWithGradient()
+
+
+def test_memoize_jac_function_before_gradient(function_with_gradient):
+    memoized_function = MemoizeJac(function_with_gradient)
+
+    x0 = np.array([1.0, 2.0])
+    assert_allclose(memoized_function(x0), 5.0)
+    assert function_with_gradient.number_of_calls == 1
+
+    assert_allclose(memoized_function.derivative(x0), 2 * x0)
+    assert function_with_gradient.number_of_calls == 1, \
+        "function is not recomputed " \
+        "if gradient is requested after function value"
+
+    assert_allclose(
+        memoized_function(2 * x0), 20.0,
+        err_msg="different input triggers new computation")
+    assert function_with_gradient.number_of_calls == 2, \
+        "different input triggers new computation"
+
+
+def test_memoize_jac_gradient_before_function(function_with_gradient):
+    memoized_function = MemoizeJac(function_with_gradient)
+
+    x0 = np.array([1.0, 2.0])
+    assert_allclose(memoized_function.derivative(x0), 2 * x0)
+    assert function_with_gradient.number_of_calls == 1
+
+    assert_allclose(memoized_function(x0), 5.0)
+    assert function_with_gradient.number_of_calls == 1, \
+        "function is not recomputed " \
+        "if function value is requested after gradient"
+
+    assert_allclose(
+        memoized_function.derivative(2 * x0), 4 * x0,
+        err_msg="different input triggers new computation")
+    assert function_with_gradient.number_of_calls == 2, \
+        "different input triggers new computation"
+
+
+def test_memoize_jac_with_bfgs(function_with_gradient):
+    """ Tests that using MemoizedJac in combination with ScalarFunction
+        and BFGS does not lead to repeated function evaluations.
+        Tests changes made in response to GH11868.
+    """
+    memoized_function = MemoizeJac(function_with_gradient)
+    jac = memoized_function.derivative
+    hess = optimize.BFGS()
+
+    x0 = np.array([1.0, 0.5])
+    scalar_function = ScalarFunction(
+        memoized_function, x0, (), jac, hess, None, None)
+    assert function_with_gradient.number_of_calls == 1
+
+    scalar_function.fun(x0 + 0.1)
+    assert function_with_gradient.number_of_calls == 2
+
+    scalar_function.fun(x0 + 0.2)
+    assert function_with_gradient.number_of_calls == 3
+
+
+def test_gh12696():
+    # Test that optimize doesn't throw warning gh-12696
+    with assert_no_warnings():
+        optimize.fminbound(
+            lambda x: np.array([x**2]), -np.pi, np.pi, disp=False)
+
+
+# --- Test minimize with equal upper and lower bounds --- #
+
+def setup_test_equal_bounds():
+
+    np.random.seed(0)
+    x0 = np.random.rand(4)
+    lb = np.array([0, 2, -1, -1.0])
+    ub = np.array([3, 2, 2, -1.0])
+    i_eb = (lb == ub)
+
+    def check_x(x, check_size=True, check_values=True):
+        if check_size:
+            assert x.size == 4
+        if check_values:
+            assert_allclose(x[i_eb], lb[i_eb])
+
+    def func(x):
+        check_x(x)
+        return optimize.rosen(x)
+
+    def grad(x):
+        check_x(x)
+        return optimize.rosen_der(x)
+
+    def callback(x, *args):
+        check_x(x)
+
+    def constraint1(x):
+        check_x(x, check_values=False)
+        return x[0:1] - 1
+
+    def jacobian1(x):
+        check_x(x, check_values=False)
+        dc = np.zeros_like(x)
+        dc[0] = 1
+        return dc
+
+    def constraint2(x):
+        check_x(x, check_values=False)
+        return x[2:3] - 0.5
+
+    def jacobian2(x):
+        check_x(x, check_values=False)
+        dc = np.zeros_like(x)
+        dc[2] = 1
+        return dc
+
+    c1a = NonlinearConstraint(constraint1, -np.inf, 0)
+    c1b = NonlinearConstraint(constraint1, -np.inf, 0, jacobian1)
+    c2a = NonlinearConstraint(constraint2, -np.inf, 0)
+    c2b = NonlinearConstraint(constraint2, -np.inf, 0, jacobian2)
+
+    # test using the three methods that accept bounds, use derivatives, and
+    # have some trouble when bounds fix variables
+    methods = ('L-BFGS-B', 'SLSQP', 'TNC')
+
+    # test w/out gradient, w/ gradient, and w/ combined objective/gradient
+    kwds = ({"fun": func, "jac": False},
+            {"fun": func, "jac": grad},
+            {"fun": (lambda x: (func(x), grad(x))),
+             "jac": True})
+
+    # test with both old- and new-style bounds
+    bound_types = (lambda lb, ub: list(zip(lb, ub)),
+                   Bounds)
+
+    # Test for many combinations of constraints w/ and w/out jacobian
+    # Pairs in format: (test constraints, reference constraints)
+    # (always use analytical jacobian in reference)
+    constraints = ((None, None), ([], []),
+                   (c1a, c1b), (c2b, c2b),
+                   ([c1b], [c1b]), ([c2a], [c2b]),
+                   ([c1a, c2a], [c1b, c2b]),
+                   ([c1a, c2b], [c1b, c2b]),
+                   ([c1b, c2b], [c1b, c2b]))
+
+    # test with and without callback function
+    callbacks = (None, callback)
+
+    data = {"methods": methods, "kwds": kwds, "bound_types": bound_types,
+            "constraints": constraints, "callbacks": callbacks,
+            "lb": lb, "ub": ub, "x0": x0, "i_eb": i_eb}
+
+    return data
+
+
+eb_data = setup_test_equal_bounds()
+
+
+# This test is about handling fixed variables, not the accuracy of the solvers
+@pytest.mark.xfail_on_32bit("Failures due to floating point issues, not logic")
+@pytest.mark.parametrize('method', eb_data["methods"])
+@pytest.mark.parametrize('kwds', eb_data["kwds"])
+@pytest.mark.parametrize('bound_type', eb_data["bound_types"])
+@pytest.mark.parametrize('constraints', eb_data["constraints"])
+@pytest.mark.parametrize('callback', eb_data["callbacks"])
+def test_equal_bounds(method, kwds, bound_type, constraints, callback):
+    """
+    Tests that minimizers still work if (bounds.lb == bounds.ub).any()
+    gh12502 - Divide by zero in Jacobian numerical differentiation when
+    equality bounds constraints are used
+    """
+    # GH-15051; slightly more skips than necessary; hopefully fixed by GH-14882
+    if (platform.machine() == 'aarch64' and method == "TNC"
+            and kwds["jac"] is False and callback is not None):
+        pytest.skip('Tolerance violation on aarch')
+
+    lb, ub = eb_data["lb"], eb_data["ub"]
+    x0, i_eb = eb_data["x0"], eb_data["i_eb"]
+
+    test_constraints, reference_constraints = constraints
+    if test_constraints and not method == 'SLSQP':
+        pytest.skip('Only SLSQP supports nonlinear constraints')
+    # reference constraints always have analytical jacobian
+    # if test constraints are not the same, we'll need finite differences
+    fd_needed = (test_constraints != reference_constraints)
+
+    bounds = bound_type(lb, ub)  # old- or new-style
+
+    kwds.update({"x0": x0, "method": method, "bounds": bounds,
+                 "constraints": test_constraints, "callback": callback})
+    res = optimize.minimize(**kwds)
+
+    expected = optimize.minimize(optimize.rosen, x0, method=method,
+                                 jac=optimize.rosen_der, bounds=bounds,
+                                 constraints=reference_constraints)
+
+    # compare the output of a solution with FD vs that of an analytic grad
+    assert res.success
+    assert_allclose(res.fun, expected.fun, rtol=1.5e-6)
+    assert_allclose(res.x, expected.x, rtol=5e-4)
+
+    if fd_needed or kwds['jac'] is False:
+        expected.jac[i_eb] = np.nan
+    assert res.jac.shape[0] == 4
+    assert_allclose(res.jac[i_eb], expected.jac[i_eb], rtol=1e-6)
+
+    if not (kwds['jac'] or test_constraints or isinstance(bounds, Bounds)):
+        # compare the output to an equivalent FD minimization that doesn't
+        # need factorization
+        def fun(x):
+            new_x = np.array([np.nan, 2, np.nan, -1])
+            new_x[[0, 2]] = x
+            return optimize.rosen(new_x)
+
+        fd_res = optimize.minimize(fun,
+                                   x0[[0, 2]],
+                                   method=method,
+                                   bounds=bounds[::2])
+        assert_allclose(res.fun, fd_res.fun)
+        # TODO this test should really be equivalent to factorized version
+        # above, down to res.nfev. However, testing found that when TNC is
+        # called with or without a callback the output is different. The two
+        # should be the same! This indicates that the TNC callback may be
+        # mutating something when it shouldn't.
+        assert_allclose(res.x[[0, 2]], fd_res.x, rtol=2e-6)
+
+
+@pytest.mark.parametrize('method', eb_data["methods"])
+def test_all_bounds_equal(method):
+    # this only tests methods that have parameters factored out when lb==ub
+    # it does not test other methods that work with bounds
+    def f(x, p1=1):
+        return np.linalg.norm(x) + p1
+
+    bounds = [(1, 1), (2, 2)]
+    x0 = (1.0, 3.0)
+    res = optimize.minimize(f, x0, bounds=bounds, method=method)
+    assert res.success
+    assert_allclose(res.fun, f([1.0, 2.0]))
+    assert res.nfev == 1
+    assert res.message == 'All independent variables were fixed by bounds.'
+
+    args = (2,)
+    res = optimize.minimize(f, x0, bounds=bounds, method=method, args=args)
+    assert res.success
+    assert_allclose(res.fun, f([1.0, 2.0], 2))
+
+    if method.upper() == 'SLSQP':
+        def con(x):
+            return np.sum(x)
+        nlc = NonlinearConstraint(con, -np.inf, 0.0)
+        res = optimize.minimize(
+            f, x0, bounds=bounds, method=method, constraints=[nlc]
+        )
+        assert res.success is False
+        assert_allclose(res.fun, f([1.0, 2.0]))
+        assert res.nfev == 1
+        message = "All independent variables were fixed by bounds, but"
+        assert res.message.startswith(message)
+
+        nlc = NonlinearConstraint(con, -np.inf, 4)
+        res = optimize.minimize(
+            f, x0, bounds=bounds, method=method, constraints=[nlc]
+        )
+        assert res.success is True
+        assert_allclose(res.fun, f([1.0, 2.0]))
+        assert res.nfev == 1
+        message = "All independent variables were fixed by bounds at values"
+        assert res.message.startswith(message)
+
+
+def test_eb_constraints():
+    # make sure constraint functions aren't overwritten when equal bounds
+    # are employed, and a parameter is factored out. GH14859
+    def f(x):
+        return x[0]**3 + x[1]**2 + x[2]*x[3]
+
+    def cfun(x):
+        return x[0] + x[1] + x[2] + x[3] - 40
+
+    constraints = [{'type': 'ineq', 'fun': cfun}]
+
+    bounds = [(0, 20)] * 4
+    bounds[1] = (5, 5)
+    optimize.minimize(
+        f,
+        x0=[1, 2, 3, 4],
+        method='SLSQP',
+        bounds=bounds,
+        constraints=constraints,
+    )
+    assert constraints[0]['fun'] == cfun
+
+
+def test_show_options():
+    solver_methods = {
+        'minimize': MINIMIZE_METHODS,
+        'minimize_scalar': MINIMIZE_SCALAR_METHODS,
+        'root': ROOT_METHODS,
+        'root_scalar': ROOT_SCALAR_METHODS,
+        'linprog': LINPROG_METHODS,
+        'quadratic_assignment': QUADRATIC_ASSIGNMENT_METHODS,
+    }
+    for solver, methods in solver_methods.items():
+        for method in methods:
+            # testing that `show_options` works without error
+            show_options(solver, method)
+
+    unknown_solver_method = {
+        'minimize': "ekki",  # unknown method
+        'maximize': "cg",  # unknown solver
+        'maximize_scalar': "ekki",  # unknown solver and method
+    }
+    for solver, method in unknown_solver_method.items():
+        # testing that `show_options` raises ValueError
+        assert_raises(ValueError, show_options, solver, method)
+
+
+def test_bounds_with_list():
+    # gh13501. Bounds created with lists weren't working for Powell.
+    bounds = optimize.Bounds(lb=[5., 5.], ub=[10., 10.])
+    optimize.minimize(
+        optimize.rosen, x0=np.array([9, 9]), method='Powell', bounds=bounds
+    )
+
+
+def test_x_overwritten_user_function():
+    # if the user overwrites the x-array in the user function it's likely
+    # that the minimizer stops working properly.
+    # gh13740
+    def fquad(x):
+        a = np.arange(np.size(x))
+        x -= a
+        x *= x
+        return np.sum(x)
+
+    def fquad_jac(x):
+        a = np.arange(np.size(x))
+        x *= 2
+        x -= 2 * a
+        return x
+
+    def fquad_hess(x):
+        return np.eye(np.size(x)) * 2.0
+
+    meth_jac = [
+        'newton-cg', 'dogleg', 'trust-ncg', 'trust-exact',
+        'trust-krylov', 'trust-constr'
+    ]
+    meth_hess = [
+        'dogleg', 'trust-ncg', 'trust-exact', 'trust-krylov', 'trust-constr'
+    ]
+
+    x0 = np.ones(5) * 1.5
+
+    for meth in MINIMIZE_METHODS:
+        jac = None
+        hess = None
+        if meth in meth_jac:
+            jac = fquad_jac
+        if meth in meth_hess:
+            hess = fquad_hess
+        res = optimize.minimize(fquad, x0, method=meth, jac=jac, hess=hess)
+        assert_allclose(res.x, np.arange(np.size(x0)), atol=2e-4)
+
+
+class TestGlobalOptimization:
+
+    def test_optimize_result_attributes(self):
+        def func(x):
+            return x ** 2
+
+        # Note that `brute` solver does not return `OptimizeResult`
+        results = [optimize.basinhopping(func, x0=1),
+                   optimize.differential_evolution(func, [(-4, 4)]),
+                   optimize.shgo(func, [(-4, 4)]),
+                   optimize.dual_annealing(func, [(-4, 4)]),
+                   optimize.direct(func, [(-4, 4)]),
+                   ]
+
+        for result in results:
+            assert isinstance(result, optimize.OptimizeResult)
+            assert hasattr(result, "x")
+            assert hasattr(result, "success")
+            assert hasattr(result, "message")
+            assert hasattr(result, "fun")
+            assert hasattr(result, "nfev")
+            assert hasattr(result, "nit")
+
+
+def test_approx_fprime():
+    # check that approx_fprime (serviced by approx_derivative) works for
+    # jac and hess
+    g = optimize.approx_fprime(himmelblau_x0, himmelblau)
+    assert_allclose(g, himmelblau_grad(himmelblau_x0), rtol=5e-6)
+
+    h = optimize.approx_fprime(himmelblau_x0, himmelblau_grad)
+    assert_allclose(h, himmelblau_hess(himmelblau_x0), rtol=5e-6)
+
+
+def test_gh12594():
+    # gh-12594 reported an error in `_linesearch_powell` and
+    # `_line_for_search` when `Bounds` was passed lists instead of arrays.
+    # Check that results are the same whether the inputs are lists or arrays.
+
+    def f(x):
+        return x[0]**2 + (x[1] - 1)**2
+
+    bounds = Bounds(lb=[-10, -10], ub=[10, 10])
+    res = optimize.minimize(f, x0=(0, 0), method='Powell', bounds=bounds)
+    bounds = Bounds(lb=np.array([-10, -10]), ub=np.array([10, 10]))
+    ref = optimize.minimize(f, x0=(0, 0), method='Powell', bounds=bounds)
+
+    assert_allclose(res.fun, ref.fun)
+    assert_allclose(res.x, ref.x)
+
+
+@pytest.mark.parametrize('method', ['Newton-CG', 'trust-constr'])
+@pytest.mark.parametrize('sparse_type', [coo_matrix, csc_matrix, csr_matrix,
+                                         coo_array, csr_array, csc_array])
+def test_sparse_hessian(method, sparse_type):
+    # gh-8792 reported an error for minimization with `newton_cg` when `hess`
+    # returns a sparse matrix. Check that results are the same whether `hess`
+    # returns a dense or sparse matrix for optimization methods that accept
+    # sparse Hessian matrices.
+
+    def sparse_rosen_hess(x):
+        return sparse_type(rosen_hess(x))
+
+    x0 = [2., 2.]
+
+    res_sparse = optimize.minimize(rosen, x0, method=method,
+                                   jac=rosen_der, hess=sparse_rosen_hess)
+    res_dense = optimize.minimize(rosen, x0, method=method,
+                                  jac=rosen_der, hess=rosen_hess)
+
+    assert_allclose(res_dense.fun, res_sparse.fun)
+    assert_allclose(res_dense.x, res_sparse.x)
+    assert res_dense.nfev == res_sparse.nfev
+    assert res_dense.njev == res_sparse.njev
+    assert res_dense.nhev == res_sparse.nhev
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_quadratic_assignment.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_quadratic_assignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f476be1604a21573b15b5dedfa3ceb47974e2d8
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_quadratic_assignment.py
@@ -0,0 +1,431 @@
+import pytest
+import numpy as np
+from scipy.optimize import quadratic_assignment, OptimizeWarning
+from scipy.optimize._qap import _calc_score as _score
+from numpy.testing import assert_equal, assert_, assert_warns
+
+
+################
+# Common Tests #
+################
+
+def chr12c():
+    A = [
+        [0, 90, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [90, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0],
+        [10, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0],
+        [0, 23, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0],
+        [0, 0, 43, 0, 0, 0, 26, 0, 0, 0, 0, 0],
+        [0, 0, 0, 88, 0, 0, 0, 16, 0, 0, 0, 0],
+        [0, 0, 0, 0, 26, 0, 0, 0, 1, 0, 0, 0],
+        [0, 0, 0, 0, 0, 16, 0, 0, 0, 96, 0, 0],
+        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 29, 0],
+        [0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 37],
+        [0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 0, 0],
+    ]
+    B = [
+        [0, 36, 54, 26, 59, 72, 9, 34, 79, 17, 46, 95],
+        [36, 0, 73, 35, 90, 58, 30, 78, 35, 44, 79, 36],
+        [54, 73, 0, 21, 10, 97, 58, 66, 69, 61, 54, 63],
+        [26, 35, 21, 0, 93, 12, 46, 40, 37, 48, 68, 85],
+        [59, 90, 10, 93, 0, 64, 5, 29, 76, 16, 5, 76],
+        [72, 58, 97, 12, 64, 0, 96, 55, 38, 54, 0, 34],
+        [9, 30, 58, 46, 5, 96, 0, 83, 35, 11, 56, 37],
+        [34, 78, 66, 40, 29, 55, 83, 0, 44, 12, 15, 80],
+        [79, 35, 69, 37, 76, 38, 35, 44, 0, 64, 39, 33],
+        [17, 44, 61, 48, 16, 54, 11, 12, 64, 0, 70, 86],
+        [46, 79, 54, 68, 5, 0, 56, 15, 39, 70, 0, 18],
+        [95, 36, 63, 85, 76, 34, 37, 80, 33, 86, 18, 0],
+    ]
+    A, B = np.array(A), np.array(B)
+    n = A.shape[0]
+
+    opt_perm = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - [1] * n
+
+    return A, B, opt_perm
+
+
+class QAPCommonTests:
+    """
+    Base class for `quadratic_assignment` tests.
+    """
+    def setup_method(self):
+        np.random.seed(0)
+
+    # Test global optima of problem from Umeyama IVB
+    # https://pcl.sitehost.iu.edu/rgoldsto/papers/weighted%20graph%20match2.pdf
+    # Graph matching maximum is in the paper
+    # QAP minimum determined by brute force
+    def test_accuracy_1(self):
+        # besides testing accuracy, check that A and B can be lists
+        A = [[0, 3, 4, 2],
+             [0, 0, 1, 2],
+             [1, 0, 0, 1],
+             [0, 0, 1, 0]]
+
+        B = [[0, 4, 2, 4],
+             [0, 0, 1, 0],
+             [0, 2, 0, 2],
+             [0, 1, 2, 0]]
+
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, "maximize": False})
+        assert_equal(res.fun, 10)
+        assert_equal(res.col_ind, np.array([1, 2, 3, 0]))
+
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, "maximize": True})
+
+        if self.method == 'faq':
+            # Global optimum is 40, but FAQ gets 37
+            assert_equal(res.fun, 37)
+            assert_equal(res.col_ind, np.array([0, 2, 3, 1]))
+        else:
+            assert_equal(res.fun, 40)
+            assert_equal(res.col_ind, np.array([0, 3, 1, 2]))
+
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, "maximize": True})
+
+    # Test global optima of problem from Umeyama IIIB
+    # https://pcl.sitehost.iu.edu/rgoldsto/papers/weighted%20graph%20match2.pdf
+    # Graph matching maximum is in the paper
+    # QAP minimum determined by brute force
+    def test_accuracy_2(self):
+
+        A = np.array([[0, 5, 8, 6],
+                      [5, 0, 5, 1],
+                      [8, 5, 0, 2],
+                      [6, 1, 2, 0]])
+
+        B = np.array([[0, 1, 8, 4],
+                      [1, 0, 5, 2],
+                      [8, 5, 0, 5],
+                      [4, 2, 5, 0]])
+
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, "maximize": False})
+        if self.method == 'faq':
+            # Global optimum is 176, but FAQ gets 178
+            assert_equal(res.fun, 178)
+            assert_equal(res.col_ind, np.array([1, 0, 3, 2]))
+        else:
+            assert_equal(res.fun, 176)
+            assert_equal(res.col_ind, np.array([1, 2, 3, 0]))
+
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, "maximize": True})
+        assert_equal(res.fun, 286)
+        assert_equal(res.col_ind, np.array([2, 3, 0, 1]))
+
+    def test_accuracy_3(self):
+
+        A, B, opt_perm = chr12c()
+
+        # basic minimization
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0})
+        assert_(11156 <= res.fun < 21000)
+        assert_equal(res.fun, _score(A, B, res.col_ind))
+
+        # basic maximization
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={"rng": 0, 'maximize': True})
+        assert_(74000 <= res.fun < 85000)
+        assert_equal(res.fun, _score(A, B, res.col_ind))
+
+        # check ofv with strictly partial match
+        seed_cost = np.array([4, 8, 10])
+        seed = np.asarray([seed_cost, opt_perm[seed_cost]]).T
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={'partial_match': seed})
+        assert_(11156 <= res.fun < 21000)
+        assert_equal(res.col_ind[seed_cost], opt_perm[seed_cost])
+
+        # check performance when partial match is the global optimum
+        seed = np.asarray([np.arange(len(A)), opt_perm]).T
+        res = quadratic_assignment(A, B, method=self.method,
+                                   options={'partial_match': seed})
+        assert_equal(res.col_ind, seed[:, 1].T)
+        assert_equal(res.fun, 11156)
+        assert_equal(res.nit, 0)
+
+        # check performance with zero sized matrix inputs
+        empty = np.empty((0, 0))
+        res = quadratic_assignment(empty, empty, method=self.method,
+                                   options={"rng": 0})
+        assert_equal(res.nit, 0)
+        assert_equal(res.fun, 0)
+
+    def test_unknown_options(self):
+        A, B, opt_perm = chr12c()
+
+        def f():
+            quadratic_assignment(A, B, method=self.method,
+                                 options={"ekki-ekki": True})
+        assert_warns(OptimizeWarning, f)
+
+
+class TestFAQ(QAPCommonTests):
+    method = "faq"
+
+    def test_options(self):
+        # cost and distance matrices of QAPLIB instance chr12c
+        A, B, opt_perm = chr12c()
+        n = len(A)
+
+        # check that max_iter is obeying with low input value
+        res = quadratic_assignment(A, B,
+                                   options={'maxiter': 5})
+        assert_equal(res.nit, 5)
+
+        # test with shuffle
+        res = quadratic_assignment(A, B,
+                                   options={'shuffle_input': True})
+        assert_(11156 <= res.fun < 21000)
+
+        # test with randomized init
+        res = quadratic_assignment(A, B,
+                                   options={'rng': 1, 'P0': "randomized"})
+        assert_(11156 <= res.fun < 21000)
+
+        # check with specified P0
+        K = np.ones((n, n)) / float(n)
+        K = _doubly_stochastic(K)
+        res = quadratic_assignment(A, B,
+                                   options={'P0': K})
+        assert_(11156 <= res.fun < 21000)
+
+    def test_specific_input_validation(self):
+
+        A = np.identity(2)
+        B = A
+
+        # method is implicitly faq
+
+        # ValueError Checks: making sure single value parameters are of
+        # correct value
+        with pytest.raises(ValueError, match="Invalid 'P0' parameter"):
+            quadratic_assignment(A, B, options={'P0': "random"})
+        with pytest.raises(
+                ValueError, match="'maxiter' must be a positive integer"):
+            quadratic_assignment(A, B, options={'maxiter': -1})
+        with pytest.raises(ValueError, match="'tol' must be a positive float"):
+            quadratic_assignment(A, B, options={'tol': -1})
+
+        # TypeError Checks: making sure single value parameters are of
+        # correct type
+        with pytest.raises(TypeError):
+            quadratic_assignment(A, B, options={'maxiter': 1.5})
+
+        # test P0 matrix input
+        with pytest.raises(
+                ValueError,
+                match="`P0` matrix must have shape m' x m', where m'=n-m"):
+            quadratic_assignment(
+                np.identity(4), np.identity(4),
+                options={'P0': np.ones((3, 3))}
+            )
+
+        K = [[0.4, 0.2, 0.3],
+             [0.3, 0.6, 0.2],
+             [0.2, 0.2, 0.7]]
+        # matrix that isn't quite doubly stochastic
+        with pytest.raises(
+                ValueError, match="`P0` matrix must be doubly stochastic"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3), options={'P0': K}
+            )
+
+
+class Test2opt(QAPCommonTests):
+    method = "2opt"
+
+    def test_deterministic(self):
+        # np.random.seed(0) executes before every method
+        n = 20
+
+        A = np.random.rand(n, n)
+        B = np.random.rand(n, n)
+        res1 = quadratic_assignment(A, B, method=self.method)
+
+        np.random.seed(0)
+
+        A = np.random.rand(n, n)
+        B = np.random.rand(n, n)
+        res2 = quadratic_assignment(A, B, method=self.method)
+
+        assert_equal(res1.nit, res2.nit)
+
+    def test_partial_guess(self):
+        n = 5
+        A = np.random.rand(n, n)
+        B = np.random.rand(n, n)
+
+        res1 = quadratic_assignment(A, B, method=self.method,
+                                    options={'rng': 0})
+        guess = np.array([np.arange(5), res1.col_ind]).T
+        res2 = quadratic_assignment(A, B, method=self.method,
+                                    options={'rng': 0, 'partial_guess': guess})
+        fix = [2, 4]
+        match = np.array([np.arange(5)[fix], res1.col_ind[fix]]).T
+        res3 = quadratic_assignment(A, B, method=self.method,
+                                    options={'rng': 0, 'partial_guess': guess,
+                                             'partial_match': match})
+        assert_(res1.nit != n*(n+1)/2)
+        assert_equal(res2.nit, n*(n+1)/2)      # tests each swap exactly once
+        assert_equal(res3.nit, (n-2)*(n-1)/2)  # tests free swaps exactly once
+
+    def test_specific_input_validation(self):
+        # can't have more seed nodes than cost/dist nodes
+        _rm = _range_matrix
+        with pytest.raises(
+                ValueError,
+                match="`partial_guess` can have only as many entries as"):
+            quadratic_assignment(np.identity(3), np.identity(3),
+                                 method=self.method,
+                                 options={'partial_guess': _rm(5, 2)})
+        # test for only two seed columns
+        with pytest.raises(
+                ValueError, match="`partial_guess` must have two columns"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3), method=self.method,
+                options={'partial_guess': _range_matrix(2, 3)}
+            )
+        # test that seed has no more than two dimensions
+        with pytest.raises(
+                ValueError, match="`partial_guess` must have exactly two"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3), method=self.method,
+                options={'partial_guess': np.random.rand(3, 2, 2)}
+            )
+        # seeds cannot be negative valued
+        with pytest.raises(
+                ValueError, match="`partial_guess` must contain only pos"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3), method=self.method,
+                options={'partial_guess': -1 * _range_matrix(2, 2)}
+            )
+        # seeds can't have values greater than number of nodes
+        with pytest.raises(
+                ValueError,
+                match="`partial_guess` entries must be less than number"):
+            quadratic_assignment(
+                np.identity(5), np.identity(5), method=self.method,
+                options={'partial_guess': 2 * _range_matrix(4, 2)}
+            )
+        # columns of seed matrix must be unique
+        with pytest.raises(
+                ValueError,
+                match="`partial_guess` column entries must be unique"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3), method=self.method,
+                options={'partial_guess': np.ones((2, 2))}
+            )
+
+
+class TestQAPOnce:
+    def setup_method(self):
+        np.random.seed(0)
+
+    # these don't need to be repeated for each method
+    def test_common_input_validation(self):
+        # test that non square matrices return error
+        with pytest.raises(ValueError, match="`A` must be square"):
+            quadratic_assignment(
+                np.random.random((3, 4)),
+                np.random.random((3, 3)),
+            )
+        with pytest.raises(ValueError, match="`B` must be square"):
+            quadratic_assignment(
+                np.random.random((3, 3)),
+                np.random.random((3, 4)),
+            )
+        # test that cost and dist matrices have no more than two dimensions
+        with pytest.raises(
+                ValueError, match="`A` and `B` must have exactly two"):
+            quadratic_assignment(
+                np.random.random((3, 3, 3)),
+                np.random.random((3, 3, 3)),
+            )
+        # test that cost and dist matrices of different sizes return error
+        with pytest.raises(
+                ValueError,
+                match="`A` and `B` matrices must be of equal size"):
+            quadratic_assignment(
+                np.random.random((3, 3)),
+                np.random.random((4, 4)),
+            )
+        # can't have more seed nodes than cost/dist nodes
+        _rm = _range_matrix
+        with pytest.raises(
+                ValueError,
+                match="`partial_match` can have only as many seeds as"):
+            quadratic_assignment(np.identity(3), np.identity(3),
+                                 options={'partial_match': _rm(5, 2)})
+        # test for only two seed columns
+        with pytest.raises(
+                ValueError, match="`partial_match` must have two columns"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3),
+                options={'partial_match': _range_matrix(2, 3)}
+            )
+        # test that seed has no more than two dimensions
+        with pytest.raises(
+                ValueError, match="`partial_match` must have exactly two"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3),
+                options={'partial_match': np.random.rand(3, 2, 2)}
+            )
+        # seeds cannot be negative valued
+        with pytest.raises(
+                ValueError, match="`partial_match` must contain only pos"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3),
+                options={'partial_match': -1 * _range_matrix(2, 2)}
+            )
+        # seeds can't have values greater than number of nodes
+        with pytest.raises(
+                ValueError,
+                match="`partial_match` entries must be less than number"):
+            quadratic_assignment(
+                np.identity(5), np.identity(5),
+                options={'partial_match': 2 * _range_matrix(4, 2)}
+            )
+        # columns of seed matrix must be unique
+        with pytest.raises(
+                ValueError,
+                match="`partial_match` column entries must be unique"):
+            quadratic_assignment(
+                np.identity(3), np.identity(3),
+                options={'partial_match': np.ones((2, 2))}
+            )
+
+
+def _range_matrix(a, b):
+    mat = np.zeros((a, b))
+    for i in range(b):
+        mat[:, i] = np.arange(a)
+    return mat
+
+
+def _doubly_stochastic(P, tol=1e-3):
+    # cleaner implementation of btaba/sinkhorn_knopp
+
+    max_iter = 1000
+    c = 1 / P.sum(axis=0)
+    r = 1 / (P @ c)
+    P_eps = P
+
+    for it in range(max_iter):
+        if ((np.abs(P_eps.sum(axis=1) - 1) < tol).all() and
+                (np.abs(P_eps.sum(axis=0) - 1) < tol).all()):
+            # All column/row sums ~= 1 within threshold
+            break
+
+        c = 1 / (r @ P)
+        r = 1 / (P @ c)
+        P_eps = r[:, None] * P * c
+
+    return P_eps
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_regression.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..44916ba96293db19756b8222422e76945aa48ebb
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_regression.py
@@ -0,0 +1,40 @@
+"""Regression tests for optimize.
+
+"""
+import numpy as np
+from numpy.testing import assert_almost_equal
+from pytest import raises as assert_raises
+
+import scipy.optimize
+
+
+class TestRegression:
+
+    def test_newton_x0_is_0(self):
+        # Regression test for gh-1601
+        tgt = 1
+        res = scipy.optimize.newton(lambda x: x - 1, 0)
+        assert_almost_equal(res, tgt)
+
+    def test_newton_integers(self):
+        # Regression test for gh-1741
+        root = scipy.optimize.newton(lambda x: x**2 - 1, x0=2,
+                                    fprime=lambda x: 2*x)
+        assert_almost_equal(root, 1.0)
+
+    def test_lmdif_errmsg(self):
+        # This shouldn't cause a crash on Python 3
+        class SomeError(Exception):
+            pass
+        counter = [0]
+
+        def func(x):
+            counter[0] += 1
+            if counter[0] < 3:
+                return x**2 - np.array([9, 10, 11])
+            else:
+                raise SomeError()
+        assert_raises(SomeError,
+                      scipy.optimize.leastsq,
+                      func, [1, 2, 3])
+
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_slsqp.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_slsqp.py
new file mode 100644
index 0000000000000000000000000000000000000000..cab46291b91e53a6b5f55cc0185741ca966ba514
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_slsqp.py
@@ -0,0 +1,608 @@
+"""
+Unit test for SLSQP optimization.
+"""
+from numpy.testing import (assert_, assert_array_almost_equal,
+                           assert_allclose, assert_equal)
+from pytest import raises as assert_raises
+import pytest
+import numpy as np
+
+from scipy.optimize import fmin_slsqp, minimize, Bounds, NonlinearConstraint
+
+
+class MyCallBack:
+    """pass a custom callback function
+
+    This makes sure it's being used.
+    """
+    def __init__(self):
+        self.been_called = False
+        self.ncalls = 0
+
+    def __call__(self, x):
+        self.been_called = True
+        self.ncalls += 1
+
+
+class TestSLSQP:
+    """
+    Test SLSQP algorithm using Example 14.4 from Numerical Methods for
+    Engineers by Steven Chapra and Raymond Canale.
+    This example maximizes the function f(x) = 2*x*y + 2*x - x**2 - 2*y**2,
+    which has a maximum at x=2, y=1.
+    """
+    def setup_method(self):
+        self.opts = {'disp': False}
+
+    def fun(self, d, sign=1.0):
+        """
+        Arguments:
+        d     - A list of two elements, where d[0] represents x and d[1] represents y
+                 in the following equation.
+        sign - A multiplier for f. Since we want to optimize it, and the SciPy
+               optimizers can only minimize functions, we need to multiply it by
+               -1 to achieve the desired solution
+        Returns:
+        2*x*y + 2*x - x**2 - 2*y**2
+
+        """
+        x = d[0]
+        y = d[1]
+        return sign*(2*x*y + 2*x - x**2 - 2*y**2)
+
+    def jac(self, d, sign=1.0):
+        """
+        This is the derivative of fun, returning a NumPy array
+        representing df/dx and df/dy.
+
+        """
+        x = d[0]
+        y = d[1]
+        dfdx = sign*(-2*x + 2*y + 2)
+        dfdy = sign*(2*x - 4*y)
+        return np.array([dfdx, dfdy], float)
+
+    def fun_and_jac(self, d, sign=1.0):
+        return self.fun(d, sign), self.jac(d, sign)
+
+    def f_eqcon(self, x, sign=1.0):
+        """ Equality constraint """
+        return np.array([x[0] - x[1]])
+
+    def fprime_eqcon(self, x, sign=1.0):
+        """ Equality constraint, derivative """
+        return np.array([[1, -1]])
+
+    def f_eqcon_scalar(self, x, sign=1.0):
+        """ Scalar equality constraint """
+        return self.f_eqcon(x, sign)[0]
+
+    def fprime_eqcon_scalar(self, x, sign=1.0):
+        """ Scalar equality constraint, derivative """
+        return self.fprime_eqcon(x, sign)[0].tolist()
+
+    def f_ieqcon(self, x, sign=1.0):
+        """ Inequality constraint """
+        return np.array([x[0] - x[1] - 1.0])
+
+    def fprime_ieqcon(self, x, sign=1.0):
+        """ Inequality constraint, derivative """
+        return np.array([[1, -1]])
+
+    def f_ieqcon2(self, x):
+        """ Vector inequality constraint """
+        return np.asarray(x)
+
+    def fprime_ieqcon2(self, x):
+        """ Vector inequality constraint, derivative """
+        return np.identity(x.shape[0])
+
+    # minimize
+    def test_minimize_unbounded_approximated(self):
+        # Minimize, method='SLSQP': unbounded, approximated jacobian.
+        jacs = [None, False, '2-point', '3-point']
+        for jac in jacs:
+            res = minimize(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                           jac=jac, method='SLSQP',
+                           options=self.opts)
+            assert_(res['success'], res['message'])
+            assert_allclose(res.x, [2, 1])
+
+    def test_minimize_unbounded_given(self):
+        # Minimize, method='SLSQP': unbounded, given Jacobian.
+        res = minimize(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                       jac=self.jac, method='SLSQP', options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [2, 1])
+
+    def test_minimize_bounded_approximated(self):
+        # Minimize, method='SLSQP': bounded, approximated jacobian.
+        jacs = [None, False, '2-point', '3-point']
+        for jac in jacs:
+            with np.errstate(invalid='ignore'):
+                res = minimize(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                               jac=jac,
+                               bounds=((2.5, None), (None, 0.5)),
+                               method='SLSQP', options=self.opts)
+            assert_(res['success'], res['message'])
+            assert_allclose(res.x, [2.5, 0.5])
+            assert_(2.5 <= res.x[0])
+            assert_(res.x[1] <= 0.5)
+
+    def test_minimize_unbounded_combined(self):
+        # Minimize, method='SLSQP': unbounded, combined function and Jacobian.
+        res = minimize(self.fun_and_jac, [-1.0, 1.0], args=(-1.0, ),
+                       jac=True, method='SLSQP', options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [2, 1])
+
+    def test_minimize_equality_approximated(self):
+        # Minimize with method='SLSQP': equality constraint, approx. jacobian.
+        jacs = [None, False, '2-point', '3-point']
+        for jac in jacs:
+            res = minimize(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                           jac=jac,
+                           constraints={'type': 'eq',
+                                        'fun': self.f_eqcon,
+                                        'args': (-1.0, )},
+                           method='SLSQP', options=self.opts)
+            assert_(res['success'], res['message'])
+            assert_allclose(res.x, [1, 1])
+
+    def test_minimize_equality_given(self):
+        # Minimize with method='SLSQP': equality constraint, given Jacobian.
+        res = minimize(self.fun, [-1.0, 1.0], jac=self.jac,
+                       method='SLSQP', args=(-1.0,),
+                       constraints={'type': 'eq', 'fun':self.f_eqcon,
+                                    'args': (-1.0, )},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [1, 1])
+
+    def test_minimize_equality_given2(self):
+        # Minimize with method='SLSQP': equality constraint, given Jacobian
+        # for fun and const.
+        res = minimize(self.fun, [-1.0, 1.0], method='SLSQP',
+                       jac=self.jac, args=(-1.0,),
+                       constraints={'type': 'eq',
+                                    'fun': self.f_eqcon,
+                                    'args': (-1.0, ),
+                                    'jac': self.fprime_eqcon},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [1, 1])
+
+    def test_minimize_equality_given_cons_scalar(self):
+        # Minimize with method='SLSQP': scalar equality constraint, given
+        # Jacobian for fun and const.
+        res = minimize(self.fun, [-1.0, 1.0], method='SLSQP',
+                       jac=self.jac, args=(-1.0,),
+                       constraints={'type': 'eq',
+                                    'fun': self.f_eqcon_scalar,
+                                    'args': (-1.0, ),
+                                    'jac': self.fprime_eqcon_scalar},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [1, 1])
+
+    def test_minimize_inequality_given(self):
+        # Minimize with method='SLSQP': inequality constraint, given Jacobian.
+        res = minimize(self.fun, [-1.0, 1.0], method='SLSQP',
+                       jac=self.jac, args=(-1.0, ),
+                       constraints={'type': 'ineq',
+                                    'fun': self.f_ieqcon,
+                                    'args': (-1.0, )},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [2, 1], atol=1e-3)
+
+    def test_minimize_inequality_given_vector_constraints(self):
+        # Minimize with method='SLSQP': vector inequality constraint, given
+        # Jacobian.
+        res = minimize(self.fun, [-1.0, 1.0], jac=self.jac,
+                       method='SLSQP', args=(-1.0,),
+                       constraints={'type': 'ineq',
+                                    'fun': self.f_ieqcon2,
+                                    'jac': self.fprime_ieqcon2},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [2, 1])
+
+    def test_minimize_bounded_constraint(self):
+        # when the constraint makes the solver go up against a parameter
+        # bound make sure that the numerical differentiation of the
+        # jacobian doesn't try to exceed that bound using a finite difference.
+        # gh11403
+        def c(x):
+            assert 0 <= x[0] <= 1 and 0 <= x[1] <= 1, x
+            return x[0] ** 0.5 + x[1]
+
+        def f(x):
+            assert 0 <= x[0] <= 1 and 0 <= x[1] <= 1, x
+            return -x[0] ** 2 + x[1] ** 2
+
+        cns = [NonlinearConstraint(c, 0, 1.5)]
+        x0 = np.asarray([0.9, 0.5])
+        bnd = Bounds([0., 0.], [1.0, 1.0])
+        minimize(f, x0, method='SLSQP', bounds=bnd, constraints=cns)
+
+    def test_minimize_bound_equality_given2(self):
+        # Minimize with method='SLSQP': bounds, eq. const., given jac. for
+        # fun. and const.
+        res = minimize(self.fun, [-1.0, 1.0], method='SLSQP',
+                       jac=self.jac, args=(-1.0, ),
+                       bounds=[(-0.8, 1.), (-1, 0.8)],
+                       constraints={'type': 'eq',
+                                    'fun': self.f_eqcon,
+                                    'args': (-1.0, ),
+                                    'jac': self.fprime_eqcon},
+                       options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_allclose(res.x, [0.8, 0.8], atol=1e-3)
+        assert_(-0.8 <= res.x[0] <= 1)
+        assert_(-1 <= res.x[1] <= 0.8)
+
+    # fmin_slsqp
+    def test_unbounded_approximated(self):
+        # SLSQP: unbounded, approximated Jacobian.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                         iprint = 0, full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [2, 1])
+
+    def test_unbounded_given(self):
+        # SLSQP: unbounded, given Jacobian.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                         fprime = self.jac, iprint = 0,
+                         full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [2, 1])
+
+    def test_equality_approximated(self):
+        # SLSQP: equality constraint, approximated Jacobian.
+        res = fmin_slsqp(self.fun,[-1.0,1.0], args=(-1.0,),
+                         eqcons = [self.f_eqcon],
+                         iprint = 0, full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [1, 1])
+
+    def test_equality_given(self):
+        # SLSQP: equality constraint, given Jacobian.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0],
+                         fprime=self.jac, args=(-1.0,),
+                         eqcons = [self.f_eqcon], iprint = 0,
+                         full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [1, 1])
+
+    def test_equality_given2(self):
+        # SLSQP: equality constraint, given Jacobian for fun and const.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0],
+                         fprime=self.jac, args=(-1.0,),
+                         f_eqcons = self.f_eqcon,
+                         fprime_eqcons = self.fprime_eqcon,
+                         iprint = 0,
+                         full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [1, 1])
+
+    def test_inequality_given(self):
+        # SLSQP: inequality constraint, given Jacobian.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0],
+                         fprime=self.jac, args=(-1.0, ),
+                         ieqcons = [self.f_ieqcon],
+                         iprint = 0, full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [2, 1], decimal=3)
+
+    def test_bound_equality_given2(self):
+        # SLSQP: bounds, eq. const., given jac. for fun. and const.
+        res = fmin_slsqp(self.fun, [-1.0, 1.0],
+                         fprime=self.jac, args=(-1.0, ),
+                         bounds = [(-0.8, 1.), (-1, 0.8)],
+                         f_eqcons = self.f_eqcon,
+                         fprime_eqcons = self.fprime_eqcon,
+                         iprint = 0, full_output = 1)
+        x, fx, its, imode, smode = res
+        assert_(imode == 0, imode)
+        assert_array_almost_equal(x, [0.8, 0.8], decimal=3)
+        assert_(-0.8 <= x[0] <= 1)
+        assert_(-1 <= x[1] <= 0.8)
+
+    def test_scalar_constraints(self):
+        # Regression test for gh-2182
+        x = fmin_slsqp(lambda z: z**2, [3.],
+                       ieqcons=[lambda z: z[0] - 1],
+                       iprint=0)
+        assert_array_almost_equal(x, [1.])
+
+        x = fmin_slsqp(lambda z: z**2, [3.],
+                       f_ieqcons=lambda z: [z[0] - 1],
+                       iprint=0)
+        assert_array_almost_equal(x, [1.])
+
+    def test_integer_bounds(self):
+        # This should not raise an exception
+        fmin_slsqp(lambda z: z**2 - 1, [0], bounds=[[0, 1]], iprint=0)
+
+    def test_array_bounds(self):
+        # NumPy used to treat n-dimensional 1-element arrays as scalars
+        # in some cases.  The handling of `bounds` by `fmin_slsqp` still
+        # supports this behavior.
+        bounds = [(-np.inf, np.inf), (np.array([2]), np.array([3]))]
+        x = fmin_slsqp(lambda z: np.sum(z**2 - 1), [2.5, 2.5], bounds=bounds,
+                       iprint=0)
+        assert_array_almost_equal(x, [0, 2])
+
+    def test_obj_must_return_scalar(self):
+        # Regression test for Github Issue #5433
+        # If objective function does not return a scalar, raises ValueError
+        with assert_raises(ValueError):
+            fmin_slsqp(lambda x: [0, 1], [1, 2, 3])
+
+    def test_obj_returns_scalar_in_list(self):
+        # Test for Github Issue #5433 and PR #6691
+        # Objective function should be able to return length-1 Python list
+        #  containing the scalar
+        fmin_slsqp(lambda x: [0], [1, 2, 3], iprint=0)
+
+    def test_callback(self):
+        # Minimize, method='SLSQP': unbounded, approximated jacobian. Check for callback
+        callback = MyCallBack()
+        res = minimize(self.fun, [-1.0, 1.0], args=(-1.0, ),
+                       method='SLSQP', callback=callback, options=self.opts)
+        assert_(res['success'], res['message'])
+        assert_(callback.been_called)
+        assert_equal(callback.ncalls, res['nit'])
+
+    def test_inconsistent_linearization(self):
+        # SLSQP must be able to solve this problem, even if the
+        # linearized problem at the starting point is infeasible.
+
+        # Linearized constraints are
+        #
+        #    2*x0[0]*x[0] >= 1
+        #
+        # At x0 = [0, 1], the second constraint is clearly infeasible.
+        # This triggers a call with n2==1 in the LSQ subroutine.
+        x = [0, 1]
+        def f1(x):
+            return x[0] + x[1] - 2
+        def f2(x):
+            return x[0] ** 2 - 1
+        sol = minimize(
+            lambda x: x[0]**2 + x[1]**2,
+            x,
+            constraints=({'type':'eq','fun': f1},
+                         {'type':'ineq','fun': f2}),
+            bounds=((0,None), (0,None)),
+            method='SLSQP')
+        x = sol.x
+
+        assert_allclose(f1(x), 0, atol=1e-8)
+        assert_(f2(x) >= -1e-8)
+        assert_(sol.success, sol)
+
+    def test_regression_5743(self):
+        # SLSQP must not indicate success for this problem,
+        # which is infeasible.
+        x = [1, 2]
+        sol = minimize(
+            lambda x: x[0]**2 + x[1]**2,
+            x,
+            constraints=({'type':'eq','fun': lambda x: x[0]+x[1]-1},
+                         {'type':'ineq','fun': lambda x: x[0]-2}),
+            bounds=((0,None), (0,None)),
+            method='SLSQP')
+        assert_(not sol.success, sol)
+
+    def test_gh_6676(self):
+        def func(x):
+            return (x[0] - 1)**2 + 2*(x[1] - 1)**2 + 0.5*(x[2] - 1)**2
+
+        sol = minimize(func, [0, 0, 0], method='SLSQP')
+        assert_(sol.jac.shape == (3,))
+
+    def test_invalid_bounds(self):
+        # Raise correct error when lower bound is greater than upper bound.
+        # See Github issue 6875.
+        bounds_list = [
+            ((1, 2), (2, 1)),
+            ((2, 1), (1, 2)),
+            ((2, 1), (2, 1)),
+            ((np.inf, 0), (np.inf, 0)),
+            ((1, -np.inf), (0, 1)),
+        ]
+        for bounds in bounds_list:
+            with assert_raises(ValueError):
+                minimize(self.fun, [-1.0, 1.0], bounds=bounds, method='SLSQP')
+
+    def test_bounds_clipping(self):
+        #
+        # SLSQP returns bogus results for initial guess out of bounds, gh-6859
+        #
+        def f(x):
+            return (x[0] - 1)**2
+
+        sol = minimize(f, [10], method='slsqp', bounds=[(None, 0)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [-10], method='slsqp', bounds=[(2, None)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 2, atol=1e-10)
+
+        sol = minimize(f, [-10], method='slsqp', bounds=[(None, 0)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [10], method='slsqp', bounds=[(2, None)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 2, atol=1e-10)
+
+        sol = minimize(f, [-0.5], method='slsqp', bounds=[(-1, 0)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [10], method='slsqp', bounds=[(-1, 0)])
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+    def test_infeasible_initial(self):
+        # Check SLSQP behavior with infeasible initial point
+        def f(x):
+            x, = x
+            return x*x - 2*x + 1
+
+        cons_u = [{'type': 'ineq', 'fun': lambda x: 0 - x}]
+        cons_l = [{'type': 'ineq', 'fun': lambda x: x - 2}]
+        cons_ul = [{'type': 'ineq', 'fun': lambda x: 0 - x},
+                   {'type': 'ineq', 'fun': lambda x: x + 1}]
+
+        sol = minimize(f, [10], method='slsqp', constraints=cons_u)
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [-10], method='slsqp', constraints=cons_l)
+        assert_(sol.success)
+        assert_allclose(sol.x, 2, atol=1e-10)
+
+        sol = minimize(f, [-10], method='slsqp', constraints=cons_u)
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [10], method='slsqp', constraints=cons_l)
+        assert_(sol.success)
+        assert_allclose(sol.x, 2, atol=1e-10)
+
+        sol = minimize(f, [-0.5], method='slsqp', constraints=cons_ul)
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+        sol = minimize(f, [10], method='slsqp', constraints=cons_ul)
+        assert_(sol.success)
+        assert_allclose(sol.x, 0, atol=1e-10)
+
+    def test_inconsistent_inequalities(self):
+        # gh-7618
+
+        def cost(x):
+            return -1 * x[0] + 4 * x[1]
+
+        def ineqcons1(x):
+            return x[1] - x[0] - 1
+
+        def ineqcons2(x):
+            return x[0] - x[1]
+
+        # The inequalities are inconsistent, so no solution can exist:
+        #
+        # x1 >= x0 + 1
+        # x0 >= x1
+
+        x0 = (1,5)
+        bounds = ((-5, 5), (-5, 5))
+        cons = (dict(type='ineq', fun=ineqcons1), dict(type='ineq', fun=ineqcons2))
+        res = minimize(cost, x0, method='SLSQP', bounds=bounds, constraints=cons)
+
+        assert_(not res.success)
+
+    def test_new_bounds_type(self):
+        def f(x):
+            return x[0] ** 2 + x[1] ** 2
+        bounds = Bounds([1, 0], [np.inf, np.inf])
+        sol = minimize(f, [0, 0], method='slsqp', bounds=bounds)
+        assert_(sol.success)
+        assert_allclose(sol.x, [1, 0])
+
+    def test_nested_minimization(self):
+
+        class NestedProblem:
+
+            def __init__(self):
+                self.F_outer_count = 0
+
+            def F_outer(self, x):
+                self.F_outer_count += 1
+                if self.F_outer_count > 1000:
+                    raise Exception("Nested minimization failed to terminate.")
+                inner_res = minimize(self.F_inner, (3, 4), method="SLSQP")
+                assert_(inner_res.success)
+                assert_allclose(inner_res.x, [1, 1])
+                return x[0]**2 + x[1]**2 + x[2]**2
+
+            def F_inner(self, x):
+                return (x[0] - 1)**2 + (x[1] - 1)**2
+
+            def solve(self):
+                outer_res = minimize(self.F_outer, (5, 5, 5), method="SLSQP")
+                assert_(outer_res.success)
+                assert_allclose(outer_res.x, [0, 0, 0])
+
+        problem = NestedProblem()
+        problem.solve()
+
+    def test_gh1758(self):
+        # the test suggested in gh1758
+        # https://nlopt.readthedocs.io/en/latest/NLopt_Tutorial/
+        # implement two equality constraints, in R^2.
+        def fun(x):
+            return np.sqrt(x[1])
+
+        def f_eqcon(x):
+            """ Equality constraint """
+            return x[1] - (2 * x[0]) ** 3
+
+        def f_eqcon2(x):
+            """ Equality constraint """
+            return x[1] - (-x[0] + 1) ** 3
+
+        c1 = {'type': 'eq', 'fun': f_eqcon}
+        c2 = {'type': 'eq', 'fun': f_eqcon2}
+
+        res = minimize(fun, [8, 0.25], method='SLSQP',
+                       constraints=[c1, c2], bounds=[(-0.5, 1), (0, 8)])
+
+        np.testing.assert_allclose(res.fun, 0.5443310539518)
+        np.testing.assert_allclose(res.x, [0.33333333, 0.2962963])
+        assert res.success
+
+    def test_gh9640(self):
+        np.random.seed(10)
+        cons = ({'type': 'ineq', 'fun': lambda x: -x[0] - x[1] - 3},
+                {'type': 'ineq', 'fun': lambda x: x[1] + x[2] - 2})
+        bnds = ((-2, 2), (-2, 2), (-2, 2))
+
+        def target(x):
+            return 1
+        x0 = [-1.8869783504471584, -0.640096352696244, -0.8174212253407696]
+        res = minimize(target, x0, method='SLSQP', bounds=bnds, constraints=cons,
+                       options={'disp':False, 'maxiter':10000})
+
+        # The problem is infeasible, so it cannot succeed
+        assert not res.success
+
+    def test_parameters_stay_within_bounds(self):
+        # gh11403. For some problems the SLSQP Fortran code suggests a step
+        # outside one of the lower/upper bounds. When this happens
+        # approx_derivative complains because it's being asked to evaluate
+        # a gradient outside its domain.
+        np.random.seed(1)
+        bounds = Bounds(np.array([0.1]), np.array([1.0]))
+        n_inputs = len(bounds.lb)
+        x0 = np.array(bounds.lb + (bounds.ub - bounds.lb) *
+                      np.random.random(n_inputs))
+
+        def f(x):
+            assert (x >= bounds.lb).all()
+            return np.linalg.norm(x)
+
+        with pytest.warns(RuntimeWarning, match='x were outside bounds'):
+            res = minimize(f, x0, method='SLSQP', bounds=bounds)
+            assert res.success
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_tnc.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_tnc.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cde9837bfd08e62916660a9750d833629b6b547
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_tnc.py
@@ -0,0 +1,345 @@
+"""
+Unit tests for TNC optimization routine from tnc.py
+"""
+import pytest
+from numpy.testing import assert_allclose, assert_equal
+
+import numpy as np
+from math import pow
+
+from scipy import optimize
+
+
+class TestTnc:
+    """TNC non-linear optimization.
+
+    These tests are taken from Prof. K. Schittkowski's test examples
+    for constrained non-linear programming.
+
+    http://www.uni-bayreuth.de/departments/math/~kschittkowski/home.htm
+
+    """
+    def setup_method(self):
+        # options for minimize
+        self.opts = {'disp': False, 'maxfun': 200}
+
+    # objective functions and Jacobian for each test
+    def f1(self, x, a=100.0):
+        return a * pow((x[1] - pow(x[0], 2)), 2) + pow(1.0 - x[0], 2)
+
+    def g1(self, x, a=100.0):
+        dif = [0, 0]
+        dif[1] = 2 * a * (x[1] - pow(x[0], 2))
+        dif[0] = -2.0 * (x[0] * (dif[1] - 1.0) + 1.0)
+        return dif
+
+    def fg1(self, x, a=100.0):
+        return self.f1(x, a), self.g1(x, a)
+
+    def f3(self, x):
+        return x[1] + pow(x[1] - x[0], 2) * 1.0e-5
+
+    def g3(self, x):
+        dif = [0, 0]
+        dif[0] = -2.0 * (x[1] - x[0]) * 1.0e-5
+        dif[1] = 1.0 - dif[0]
+        return dif
+
+    def fg3(self, x):
+        return self.f3(x), self.g3(x)
+
+    def f4(self, x):
+        return pow(x[0] + 1.0, 3) / 3.0 + x[1]
+
+    def g4(self, x):
+        dif = [0, 0]
+        dif[0] = pow(x[0] + 1.0, 2)
+        dif[1] = 1.0
+        return dif
+
+    def fg4(self, x):
+        return self.f4(x), self.g4(x)
+
+    def f5(self, x):
+        return np.sin(x[0] + x[1]) + pow(x[0] - x[1], 2) - \
+                1.5 * x[0] + 2.5 * x[1] + 1.0
+
+    def g5(self, x):
+        dif = [0, 0]
+        v1 = np.cos(x[0] + x[1])
+        v2 = 2.0*(x[0] - x[1])
+
+        dif[0] = v1 + v2 - 1.5
+        dif[1] = v1 - v2 + 2.5
+        return dif
+
+    def fg5(self, x):
+        return self.f5(x), self.g5(x)
+
+    def f38(self, x):
+        return (100.0 * pow(x[1] - pow(x[0], 2), 2) +
+                pow(1.0 - x[0], 2) + 90.0 * pow(x[3] - pow(x[2], 2), 2) +
+                pow(1.0 - x[2], 2) + 10.1 * (pow(x[1] - 1.0, 2) +
+                                             pow(x[3] - 1.0, 2)) +
+                19.8 * (x[1] - 1.0) * (x[3] - 1.0)) * 1.0e-5
+
+    def g38(self, x):
+        dif = [0, 0, 0, 0]
+        dif[0] = (-400.0 * x[0] * (x[1] - pow(x[0], 2)) -
+                  2.0 * (1.0 - x[0])) * 1.0e-5
+        dif[1] = (200.0 * (x[1] - pow(x[0], 2)) + 20.2 * (x[1] - 1.0) +
+                  19.8 * (x[3] - 1.0)) * 1.0e-5
+        dif[2] = (- 360.0 * x[2] * (x[3] - pow(x[2], 2)) -
+                  2.0 * (1.0 - x[2])) * 1.0e-5
+        dif[3] = (180.0 * (x[3] - pow(x[2], 2)) + 20.2 * (x[3] - 1.0) +
+                  19.8 * (x[1] - 1.0)) * 1.0e-5
+        return dif
+
+    def fg38(self, x):
+        return self.f38(x), self.g38(x)
+
+    def f45(self, x):
+        return 2.0 - x[0] * x[1] * x[2] * x[3] * x[4] / 120.0
+
+    def g45(self, x):
+        dif = [0] * 5
+        dif[0] = - x[1] * x[2] * x[3] * x[4] / 120.0
+        dif[1] = - x[0] * x[2] * x[3] * x[4] / 120.0
+        dif[2] = - x[0] * x[1] * x[3] * x[4] / 120.0
+        dif[3] = - x[0] * x[1] * x[2] * x[4] / 120.0
+        dif[4] = - x[0] * x[1] * x[2] * x[3] / 120.0
+        return dif
+
+    def fg45(self, x):
+        return self.f45(x), self.g45(x)
+
+    # tests
+    # minimize with method=TNC
+    def test_minimize_tnc1(self):
+        x0, bnds = [-2, 1], ([-np.inf, None], [-1.5, None])
+        xopt = [1, 1]
+        iterx = []  # to test callback
+
+        res = optimize.minimize(self.f1, x0, method='TNC', jac=self.g1,
+                                bounds=bnds, options=self.opts,
+                                callback=iterx.append)
+        assert_allclose(res.fun, self.f1(xopt), atol=1e-8)
+        assert_equal(len(iterx), res.nit)
+
+    def test_minimize_tnc1b(self):
+        x0, bnds = np.array([-2, 1]), ([-np.inf, None], [-1.5, None])
+        xopt = [1, 1]
+        x = optimize.minimize(self.f1, x0, method='TNC',
+                              bounds=bnds, options=self.opts).x
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-4)
+
+    def test_minimize_tnc1c(self):
+        x0, bnds = [-2, 1], ([-np.inf, None],[-1.5, None])
+        xopt = [1, 1]
+        x = optimize.minimize(self.fg1, x0, method='TNC',
+                              jac=True, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-8)
+
+    def test_minimize_tnc2(self):
+        x0, bnds = [-2, 1], ([-np.inf, None], [1.5, None])
+        xopt = [-1.2210262419616387, 1.5]
+        x = optimize.minimize(self.f1, x0, method='TNC',
+                              jac=self.g1, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-8)
+
+    def test_minimize_tnc3(self):
+        x0, bnds = [10, 1], ([-np.inf, None], [0.0, None])
+        xopt = [0, 0]
+        x = optimize.minimize(self.f3, x0, method='TNC',
+                              jac=self.g3, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f3(x), self.f3(xopt), atol=1e-8)
+
+    def test_minimize_tnc4(self):
+        x0,bnds = [1.125, 0.125], [(1, None), (0, None)]
+        xopt = [1, 0]
+        x = optimize.minimize(self.f4, x0, method='TNC',
+                              jac=self.g4, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f4(x), self.f4(xopt), atol=1e-8)
+
+    def test_minimize_tnc5(self):
+        x0, bnds = [0, 0], [(-1.5, 4),(-3, 3)]
+        xopt = [-0.54719755119659763, -1.5471975511965976]
+        x = optimize.minimize(self.f5, x0, method='TNC',
+                              jac=self.g5, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f5(x), self.f5(xopt), atol=1e-8)
+
+    def test_minimize_tnc38(self):
+        x0, bnds = np.array([-3, -1, -3, -1]), [(-10, 10)]*4
+        xopt = [1]*4
+        x = optimize.minimize(self.f38, x0, method='TNC',
+                              jac=self.g38, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f38(x), self.f38(xopt), atol=1e-8)
+
+    def test_minimize_tnc45(self):
+        x0, bnds = [2] * 5, [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]
+        xopt = [1, 2, 3, 4, 5]
+        x = optimize.minimize(self.f45, x0, method='TNC',
+                              jac=self.g45, bounds=bnds,
+                              options=self.opts).x
+        assert_allclose(self.f45(x), self.f45(xopt), atol=1e-8)
+
+    # fmin_tnc
+    def test_tnc1(self):
+        fg, x, bounds = self.fg1, [-2, 1], ([-np.inf, None], [-1.5, None])
+        xopt = [1, 1]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds, args=(100.0, ),
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc1b(self):
+        x, bounds = [-2, 1], ([-np.inf, None], [-1.5, None])
+        xopt = [1, 1]
+
+        x, nf, rc = optimize.fmin_tnc(self.f1, x, approx_grad=True,
+                                      bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-4,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc1c(self):
+        x, bounds = [-2, 1], ([-np.inf, None], [-1.5, None])
+        xopt = [1, 1]
+
+        x, nf, rc = optimize.fmin_tnc(self.f1, x, fprime=self.g1,
+                                      bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc2(self):
+        fg, x, bounds = self.fg1, [-2, 1], ([-np.inf, None], [1.5, None])
+        xopt = [-1.2210262419616387, 1.5]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f1(x), self.f1(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc3(self):
+        fg, x, bounds = self.fg3, [10, 1], ([-np.inf, None], [0.0, None])
+        xopt = [0, 0]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f3(x), self.f3(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc4(self):
+        fg, x, bounds = self.fg4, [1.125, 0.125], [(1, None), (0, None)]
+        xopt = [1, 0]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f4(x), self.f4(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc5(self):
+        fg, x, bounds = self.fg5, [0, 0], [(-1.5, 4),(-3, 3)]
+        xopt = [-0.54719755119659763, -1.5471975511965976]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f5(x), self.f5(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc38(self):
+        fg, x, bounds = self.fg38, np.array([-3, -1, -3, -1]), [(-10, 10)]*4
+        xopt = [1]*4
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f38(x), self.f38(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_tnc45(self):
+        fg, x, bounds = self.fg45, [2] * 5, [(0, 1), (0, 2), (0, 3),
+                                             (0, 4), (0, 5)]
+        xopt = [1, 2, 3, 4, 5]
+
+        x, nf, rc = optimize.fmin_tnc(fg, x, bounds=bounds,
+                                      messages=optimize._tnc.MSG_NONE,
+                                      maxfun=200)
+
+        assert_allclose(self.f45(x), self.f45(xopt), atol=1e-8,
+                        err_msg="TNC failed with status: " +
+                                optimize._tnc.RCSTRINGS[rc])
+
+    def test_raising_exceptions(self):
+        # tnc was ported to cython from hand-crafted cpython code
+        # check that Exception handling works.
+        def myfunc(x):
+            raise RuntimeError("myfunc")
+
+        def myfunc1(x):
+            return optimize.rosen(x)
+
+        def callback(x):
+            raise ValueError("callback")
+
+        with pytest.raises(RuntimeError):
+            optimize.minimize(myfunc, [0, 1], method="TNC")
+
+        with pytest.raises(ValueError):
+            optimize.minimize(
+                myfunc1, [0, 1], method="TNC", callback=callback
+            )
+
+    def test_callback_shouldnt_affect_minimization(self):
+        # gh14879. The output of a TNC minimization was different depending
+        # on whether a callback was used or not. The two should be equivalent.
+        # The issue was that TNC was unscaling/scaling x, and this process was
+        # altering x in the process. Now the callback uses an unscaled
+        # temporary copy of x.
+        def callback(x):
+            pass
+
+        fun = optimize.rosen
+        bounds = [(0, 10)] * 4
+        x0 = [1, 2, 3, 4.]
+        res = optimize.minimize(
+            fun, x0, bounds=bounds, method="TNC", options={"maxfun": 1000}
+        )
+        res2 = optimize.minimize(
+            fun, x0, bounds=bounds, method="TNC", options={"maxfun": 1000},
+            callback=callback
+        )
+        assert_allclose(res2.x, res.x)
+        assert_allclose(res2.fun, res.fun)
+        assert_equal(res2.nfev, res.nfev)
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion.py
new file mode 100644
index 0000000000000000000000000000000000000000..24663f18de817a77a9e63c295b5a2a453115d101
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion.py
@@ -0,0 +1,112 @@
+"""
+Unit tests for trust-region optimization routines.
+
+To run it in its simplest form::
+  nosetests test_optimize.py
+
+"""
+import pytest
+import numpy as np
+from numpy.testing import assert_, assert_equal, assert_allclose
+from scipy.optimize import (minimize, rosen, rosen_der, rosen_hess,
+                            rosen_hess_prod)
+
+
+class Accumulator:
+    """ This is for testing callbacks."""
+    def __init__(self):
+        self.count = 0
+        self.accum = None
+
+    def __call__(self, x):
+        self.count += 1
+        if self.accum is None:
+            self.accum = np.array(x)
+        else:
+            self.accum += x
+
+
+class TestTrustRegionSolvers:
+
+    def setup_method(self):
+        self.x_opt = [1.0, 1.0]
+        self.easy_guess = [2.0, 2.0]
+        self.hard_guess = [-1.2, 1.0]
+
+    def test_dogleg_accuracy(self):
+        # test the accuracy and the return_all option
+        x0 = self.hard_guess
+        r = minimize(rosen, x0, jac=rosen_der, hess=rosen_hess, tol=1e-8,
+                     method='dogleg', options={'return_all': True},)
+        assert_allclose(x0, r['allvecs'][0])
+        assert_allclose(r['x'], r['allvecs'][-1])
+        assert_allclose(r['x'], self.x_opt)
+
+    def test_dogleg_callback(self):
+        # test the callback mechanism and the maxiter and return_all options
+        accumulator = Accumulator()
+        maxiter = 5
+        r = minimize(rosen, self.hard_guess, jac=rosen_der, hess=rosen_hess,
+                     callback=accumulator, method='dogleg',
+                     options={'return_all': True, 'maxiter': maxiter},)
+        assert_equal(accumulator.count, maxiter)
+        assert_equal(len(r['allvecs']), maxiter+1)
+        assert_allclose(r['x'], r['allvecs'][-1])
+        assert_allclose(sum(r['allvecs'][1:]), accumulator.accum)
+
+    def test_dogleg_user_warning(self):
+        with pytest.warns(RuntimeWarning,
+                          match=r'Maximum number of iterations'):
+            minimize(rosen, self.hard_guess, jac=rosen_der,
+                     hess=rosen_hess, method='dogleg',
+                     options={'disp': True, 'maxiter': 1}, )
+
+    def test_solver_concordance(self):
+        # Assert that dogleg uses fewer iterations than ncg on the Rosenbrock
+        # test function, although this does not necessarily mean
+        # that dogleg is faster or better than ncg even for this function
+        # and especially not for other test functions.
+        f = rosen
+        g = rosen_der
+        h = rosen_hess
+        for x0 in (self.easy_guess, self.hard_guess):
+            r_dogleg = minimize(f, x0, jac=g, hess=h, tol=1e-8,
+                                method='dogleg', options={'return_all': True})
+            r_trust_ncg = minimize(f, x0, jac=g, hess=h, tol=1e-8,
+                                   method='trust-ncg',
+                                   options={'return_all': True})
+            r_trust_krylov = minimize(f, x0, jac=g, hess=h, tol=1e-8,
+                                   method='trust-krylov',
+                                   options={'return_all': True})
+            r_ncg = minimize(f, x0, jac=g, hess=h, tol=1e-8,
+                             method='newton-cg', options={'return_all': True})
+            r_iterative = minimize(f, x0, jac=g, hess=h, tol=1e-8,
+                                   method='trust-exact',
+                                   options={'return_all': True})
+            assert_allclose(self.x_opt, r_dogleg['x'])
+            assert_allclose(self.x_opt, r_trust_ncg['x'])
+            assert_allclose(self.x_opt, r_trust_krylov['x'])
+            assert_allclose(self.x_opt, r_ncg['x'])
+            assert_allclose(self.x_opt, r_iterative['x'])
+            assert_(len(r_dogleg['allvecs']) < len(r_ncg['allvecs']))
+
+    def test_trust_ncg_hessp(self):
+        for x0 in (self.easy_guess, self.hard_guess, self.x_opt):
+            r = minimize(rosen, x0, jac=rosen_der, hessp=rosen_hess_prod,
+                         tol=1e-8, method='trust-ncg')
+            assert_allclose(self.x_opt, r['x'])
+
+    def test_trust_ncg_start_in_optimum(self):
+        r = minimize(rosen, x0=self.x_opt, jac=rosen_der, hess=rosen_hess,
+                     tol=1e-8, method='trust-ncg')
+        assert_allclose(self.x_opt, r['x'])
+
+    def test_trust_krylov_start_in_optimum(self):
+        r = minimize(rosen, x0=self.x_opt, jac=rosen_der, hess=rosen_hess,
+                     tol=1e-8, method='trust-krylov')
+        assert_allclose(self.x_opt, r['x'])
+
+    def test_trust_exact_start_in_optimum(self):
+        r = minimize(rosen, x0=self.x_opt, jac=rosen_der, hess=rosen_hess,
+                     tol=1e-8, method='trust-exact')
+        assert_allclose(self.x_opt, r['x'])
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_exact.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_exact.py
new file mode 100644
index 0000000000000000000000000000000000000000..42c649218078d7dcf052f2557f5be23e5f5e23fc
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_exact.py
@@ -0,0 +1,354 @@
+"""
+Unit tests for trust-region iterative subproblem.
+
+To run it in its simplest form::
+  nosetests test_optimize.py
+
+"""
+import pytest
+import numpy as np
+from scipy.optimize._trustregion_exact import (
+    estimate_smallest_singular_value,
+    singular_leading_submatrix,
+    IterativeSubproblem)
+from scipy.linalg import (svd, get_lapack_funcs, det, qr, norm)
+from numpy.testing import (assert_array_equal,
+                           assert_equal, assert_array_almost_equal)
+
+
+def random_entry(n, min_eig, max_eig, case):
+
+    # Generate random matrix
+    rand = np.random.uniform(-1, 1, (n, n))
+
+    # QR decomposition
+    Q, _, _ = qr(rand, pivoting='True')
+
+    # Generate random eigenvalues
+    eigvalues = np.random.uniform(min_eig, max_eig, n)
+    eigvalues = np.sort(eigvalues)[::-1]
+
+    # Generate matrix
+    Qaux = np.multiply(eigvalues, Q)
+    A = np.dot(Qaux, Q.T)
+
+    # Generate gradient vector accordingly
+    # to the case is being tested.
+    if case == 'hard':
+        g = np.zeros(n)
+        g[:-1] = np.random.uniform(-1, 1, n-1)
+        g = np.dot(Q, g)
+    elif case == 'jac_equal_zero':
+        g = np.zeros(n)
+    else:
+        g = np.random.uniform(-1, 1, n)
+
+    return A, g
+
+
+class TestEstimateSmallestSingularValue:
+
+    def test_for_ill_condiotioned_matrix(self):
+
+        # Ill-conditioned triangular matrix
+        C = np.array([[1, 2, 3, 4],
+                      [0, 0.05, 60, 7],
+                      [0, 0, 0.8, 9],
+                      [0, 0, 0, 10]])
+
+        # Get svd decomposition
+        U, s, Vt = svd(C)
+
+        # Get smallest singular value and correspondent right singular vector.
+        smin_svd = s[-1]
+        zmin_svd = Vt[-1, :]
+
+        # Estimate smallest singular value
+        smin, zmin = estimate_smallest_singular_value(C)
+
+        # Check the estimation
+        assert_array_almost_equal(smin, smin_svd, decimal=8)
+        assert_array_almost_equal(abs(zmin), abs(zmin_svd), decimal=8)
+
+
+class TestSingularLeadingSubmatrix:
+
+    def test_for_already_singular_leading_submatrix(self):
+
+        # Define test matrix A.
+        # Note that the leading 2x2 submatrix is singular.
+        A = np.array([[1, 2, 3],
+                      [2, 4, 5],
+                      [3, 5, 6]])
+
+        # Get Cholesky from lapack functions
+        cholesky, = get_lapack_funcs(('potrf',), (A,))
+
+        # Compute Cholesky Decomposition
+        c, k = cholesky(A, lower=False, overwrite_a=False, clean=True)
+
+        delta, v = singular_leading_submatrix(A, c, k)
+
+        A[k-1, k-1] += delta
+
+        # Check if the leading submatrix is singular.
+        assert_array_almost_equal(det(A[:k, :k]), 0)
+
+        # Check if `v` fulfil the specified properties
+        quadratic_term = np.dot(v, np.dot(A, v))
+        assert_array_almost_equal(quadratic_term, 0)
+
+    def test_for_simetric_indefinite_matrix(self):
+
+        # Define test matrix A.
+        # Note that the leading 5x5 submatrix is indefinite.
+        A = np.asarray([[1, 2, 3, 7, 8],
+                        [2, 5, 5, 9, 0],
+                        [3, 5, 11, 1, 2],
+                        [7, 9, 1, 7, 5],
+                        [8, 0, 2, 5, 8]])
+
+        # Get Cholesky from lapack functions
+        cholesky, = get_lapack_funcs(('potrf',), (A,))
+
+        # Compute Cholesky Decomposition
+        c, k = cholesky(A, lower=False, overwrite_a=False, clean=True)
+
+        delta, v = singular_leading_submatrix(A, c, k)
+
+        A[k-1, k-1] += delta
+
+        # Check if the leading submatrix is singular.
+        assert_array_almost_equal(det(A[:k, :k]), 0)
+
+        # Check if `v` fulfil the specified properties
+        quadratic_term = np.dot(v, np.dot(A, v))
+        assert_array_almost_equal(quadratic_term, 0)
+
+    def test_for_first_element_equal_to_zero(self):
+
+        # Define test matrix A.
+        # Note that the leading 2x2 submatrix is singular.
+        A = np.array([[0, 3, 11],
+                      [3, 12, 5],
+                      [11, 5, 6]])
+
+        # Get Cholesky from lapack functions
+        cholesky, = get_lapack_funcs(('potrf',), (A,))
+
+        # Compute Cholesky Decomposition
+        c, k = cholesky(A, lower=False, overwrite_a=False, clean=True)
+
+        delta, v = singular_leading_submatrix(A, c, k)
+
+        A[k-1, k-1] += delta
+
+        # Check if the leading submatrix is singular
+        assert_array_almost_equal(det(A[:k, :k]), 0)
+
+        # Check if `v` fulfil the specified properties
+        quadratic_term = np.dot(v, np.dot(A, v))
+        assert_array_almost_equal(quadratic_term, 0)
+
+
+class TestIterativeSubproblem:
+
+    def test_for_the_easy_case(self):
+
+        # `H` is chosen such that `g` is not orthogonal to the
+        # eigenvector associated with the smallest eigenvalue `s`.
+        H = [[10, 2, 3, 4],
+             [2, 1, 7, 1],
+             [3, 7, 1, 7],
+             [4, 1, 7, 2]]
+        g = [1, 1, 1, 1]
+
+        # Trust Radius
+        trust_radius = 1
+
+        # Solve Subproblem
+        subprob = IterativeSubproblem(x=0,
+                                      fun=lambda x: 0,
+                                      jac=lambda x: np.array(g),
+                                      hess=lambda x: np.array(H),
+                                      k_easy=1e-10,
+                                      k_hard=1e-10)
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(p, [0.00393332, -0.55260862,
+                                      0.67065477, -0.49480341])
+        assert_array_almost_equal(hits_boundary, True)
+
+    def test_for_the_hard_case(self):
+
+        # `H` is chosen such that `g` is orthogonal to the
+        # eigenvector associated with the smallest eigenvalue `s`.
+        H = [[10, 2, 3, 4],
+             [2, 1, 7, 1],
+             [3, 7, 1, 7],
+             [4, 1, 7, 2]]
+        g = [6.4852641521327437, 1, 1, 1]
+        s = -8.2151519874416614
+
+        # Trust Radius
+        trust_radius = 1
+
+        # Solve Subproblem
+        subprob = IterativeSubproblem(x=0,
+                                      fun=lambda x: 0,
+                                      jac=lambda x: np.array(g),
+                                      hess=lambda x: np.array(H),
+                                      k_easy=1e-10,
+                                      k_hard=1e-10)
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(-s, subprob.lambda_current)
+
+    def test_for_interior_convergence(self):
+
+        H = [[1.812159, 0.82687265, 0.21838879, -0.52487006, 0.25436988],
+             [0.82687265, 2.66380283, 0.31508988, -0.40144163, 0.08811588],
+             [0.21838879, 0.31508988, 2.38020726, -0.3166346, 0.27363867],
+             [-0.52487006, -0.40144163, -0.3166346, 1.61927182, -0.42140166],
+             [0.25436988, 0.08811588, 0.27363867, -0.42140166, 1.33243101]]
+
+        g = [0.75798952, 0.01421945, 0.33847612, 0.83725004, -0.47909534]
+
+        # Solve Subproblem
+        subprob = IterativeSubproblem(x=0,
+                                      fun=lambda x: 0,
+                                      jac=lambda x: np.array(g),
+                                      hess=lambda x: np.array(H))
+        p, hits_boundary = subprob.solve(1.1)
+
+        assert_array_almost_equal(p, [-0.68585435, 0.1222621, -0.22090999,
+                                      -0.67005053, 0.31586769])
+        assert_array_almost_equal(hits_boundary, False)
+        assert_array_almost_equal(subprob.lambda_current, 0)
+        assert_array_almost_equal(subprob.niter, 1)
+
+    def test_for_jac_equal_zero(self):
+
+        H = [[0.88547534, 2.90692271, 0.98440885, -0.78911503, -0.28035809],
+             [2.90692271, -0.04618819, 0.32867263, -0.83737945, 0.17116396],
+             [0.98440885, 0.32867263, -0.87355957, -0.06521957, -1.43030957],
+             [-0.78911503, -0.83737945, -0.06521957, -1.645709, -0.33887298],
+             [-0.28035809, 0.17116396, -1.43030957, -0.33887298, -1.68586978]]
+
+        g = [0, 0, 0, 0, 0]
+
+        # Solve Subproblem
+        subprob = IterativeSubproblem(x=0,
+                                      fun=lambda x: 0,
+                                      jac=lambda x: np.array(g),
+                                      hess=lambda x: np.array(H),
+                                      k_easy=1e-10,
+                                      k_hard=1e-10)
+        p, hits_boundary = subprob.solve(1.1)
+
+        assert_array_almost_equal(p, [0.06910534, -0.01432721,
+                                      -0.65311947, -0.23815972,
+                                      -0.84954934])
+        assert_array_almost_equal(hits_boundary, True)
+
+    def test_for_jac_very_close_to_zero(self):
+
+        H = [[0.88547534, 2.90692271, 0.98440885, -0.78911503, -0.28035809],
+             [2.90692271, -0.04618819, 0.32867263, -0.83737945, 0.17116396],
+             [0.98440885, 0.32867263, -0.87355957, -0.06521957, -1.43030957],
+             [-0.78911503, -0.83737945, -0.06521957, -1.645709, -0.33887298],
+             [-0.28035809, 0.17116396, -1.43030957, -0.33887298, -1.68586978]]
+
+        g = [0, 0, 0, 0, 1e-15]
+
+        # Solve Subproblem
+        subprob = IterativeSubproblem(x=0,
+                                      fun=lambda x: 0,
+                                      jac=lambda x: np.array(g),
+                                      hess=lambda x: np.array(H),
+                                      k_easy=1e-10,
+                                      k_hard=1e-10)
+        p, hits_boundary = subprob.solve(1.1)
+
+        assert_array_almost_equal(p, [0.06910534, -0.01432721,
+                                      -0.65311947, -0.23815972,
+                                      -0.84954934])
+        assert_array_almost_equal(hits_boundary, True)
+
+    @pytest.mark.fail_slow(5)
+    def test_for_random_entries(self):
+        # Seed
+        np.random.seed(1)
+
+        # Dimension
+        n = 5
+
+        for case in ('easy', 'hard', 'jac_equal_zero'):
+
+            eig_limits = [(-20, -15),
+                          (-10, -5),
+                          (-10, 0),
+                          (-5, 5),
+                          (-10, 10),
+                          (0, 10),
+                          (5, 10),
+                          (15, 20)]
+
+            for min_eig, max_eig in eig_limits:
+                # Generate random symmetric matrix H with
+                # eigenvalues between min_eig and max_eig.
+                H, g = random_entry(n, min_eig, max_eig, case)
+
+                # Trust radius
+                trust_radius_list = [0.1, 0.3, 0.6, 0.8, 1, 1.2, 3.3, 5.5, 10]
+
+                for trust_radius in trust_radius_list:
+                    # Solve subproblem with very high accuracy
+                    subprob_ac = IterativeSubproblem(0,
+                                                     lambda x: 0,
+                                                     lambda x: g,
+                                                     lambda x: H,
+                                                     k_easy=1e-10,
+                                                     k_hard=1e-10)
+
+                    p_ac, hits_boundary_ac = subprob_ac.solve(trust_radius)
+
+                    # Compute objective function value
+                    J_ac = 1/2*np.dot(p_ac, np.dot(H, p_ac))+np.dot(g, p_ac)
+
+                    stop_criteria = [(0.1, 2),
+                                     (0.5, 1.1),
+                                     (0.9, 1.01)]
+
+                    for k_opt, k_trf in stop_criteria:
+
+                        # k_easy and k_hard computed in function
+                        # of k_opt and k_trf accordingly to
+                        # Conn, A. R., Gould, N. I., & Toint, P. L. (2000).
+                        # "Trust region methods". Siam. p. 197.
+                        k_easy = min(k_trf-1,
+                                     1-np.sqrt(k_opt))
+                        k_hard = 1-k_opt
+
+                        # Solve subproblem
+                        subprob = IterativeSubproblem(0,
+                                                      lambda x: 0,
+                                                      lambda x: g,
+                                                      lambda x: H,
+                                                      k_easy=k_easy,
+                                                      k_hard=k_hard)
+                        p, hits_boundary = subprob.solve(trust_radius)
+
+                        # Compute objective function value
+                        J = 1/2*np.dot(p, np.dot(H, p))+np.dot(g, p)
+
+                        # Check if it respect k_trf
+                        if hits_boundary:
+                            assert_array_equal(np.abs(norm(p)-trust_radius) <=
+                                               (k_trf-1)*trust_radius, True)
+                        else:
+                            assert_equal(norm(p) <= trust_radius, True)
+
+                        # Check if it respect k_opt
+                        assert_equal(J <= k_opt*J_ac, True)
+
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_krylov.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_krylov.py
new file mode 100644
index 0000000000000000000000000000000000000000..b130362323c9ba4a126019fb13974a37b35d7a28
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_trustregion_krylov.py
@@ -0,0 +1,171 @@
+"""
+Unit tests for Krylov space trust-region subproblem solver.
+
+To run it in its simplest form::
+  nosetests test_optimize.py
+
+"""
+import numpy as np
+from scipy.optimize._trlib import (get_trlib_quadratic_subproblem)
+from numpy.testing import (assert_,
+                           assert_almost_equal,
+                           assert_equal, assert_array_almost_equal)
+
+KrylovQP = get_trlib_quadratic_subproblem(tol_rel_i=1e-8, tol_rel_b=1e-6)
+KrylovQP_disp = get_trlib_quadratic_subproblem(tol_rel_i=1e-8, tol_rel_b=1e-6,
+                                               disp=True)
+
+class TestKrylovQuadraticSubproblem:
+
+    def test_for_the_easy_case(self):
+
+        # `H` is chosen such that `g` is not orthogonal to the
+        # eigenvector associated with the smallest eigenvalue.
+        H = np.array([[1.0, 0.0, 4.0],
+                      [0.0, 2.0, 0.0],
+                      [4.0, 0.0, 3.0]])
+        g = np.array([5.0, 0.0, 4.0])
+
+        # Trust Radius
+        trust_radius = 1.0
+
+        # Solve Subproblem
+        subprob = KrylovQP(x=0,
+                           fun=lambda x: 0,
+                           jac=lambda x: g,
+                           hess=lambda x: None,
+                           hessp=lambda x, y: H.dot(y))
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(p, np.array([-1.0, 0.0, 0.0]))
+        assert_equal(hits_boundary, True)
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+        # check trust region constraint
+        assert_almost_equal(np.linalg.norm(p), trust_radius)
+
+        trust_radius = 0.5
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(p,
+                np.array([-0.46125446, 0., -0.19298788]))
+        assert_equal(hits_boundary, True)
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+        # check trust region constraint
+        assert_almost_equal(np.linalg.norm(p), trust_radius)
+
+    def test_for_the_hard_case(self):
+
+        # `H` is chosen such that `g` is orthogonal to the
+        # eigenvector associated with the smallest eigenvalue.
+        H = np.array([[1.0, 0.0, 4.0],
+                      [0.0, 2.0, 0.0],
+                      [4.0, 0.0, 3.0]])
+        g = np.array([0.0, 2.0, 0.0])
+
+        # Trust Radius
+        trust_radius = 1.0
+
+        # Solve Subproblem
+        subprob = KrylovQP(x=0,
+                           fun=lambda x: 0,
+                           jac=lambda x: g,
+                           hess=lambda x: None,
+                           hessp=lambda x, y: H.dot(y))
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(p, np.array([0.0, -1.0, 0.0]))
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+        # check trust region constraint
+        assert_almost_equal(np.linalg.norm(p), trust_radius)
+
+        trust_radius = 0.5
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        assert_array_almost_equal(p, np.array([0.0, -0.5, 0.0]))
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+        # check trust region constraint
+        assert_almost_equal(np.linalg.norm(p), trust_radius)
+
+    def test_for_interior_convergence(self):
+
+        H = np.array([[1.812159, 0.82687265, 0.21838879, -0.52487006, 0.25436988],
+                      [0.82687265, 2.66380283, 0.31508988, -0.40144163, 0.08811588],
+                      [0.21838879, 0.31508988, 2.38020726, -0.3166346, 0.27363867],
+                      [-0.52487006, -0.40144163, -0.3166346, 1.61927182, -0.42140166],
+                      [0.25436988, 0.08811588, 0.27363867, -0.42140166, 1.33243101]])
+        g = np.array([0.75798952, 0.01421945, 0.33847612, 0.83725004, -0.47909534])
+        trust_radius = 1.1
+
+        # Solve Subproblem
+        subprob = KrylovQP(x=0,
+                           fun=lambda x: 0,
+                           jac=lambda x: g,
+                           hess=lambda x: None,
+                           hessp=lambda x, y: H.dot(y))
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+
+        assert_array_almost_equal(p, [-0.68585435, 0.1222621, -0.22090999,
+                                      -0.67005053, 0.31586769])
+        assert_array_almost_equal(hits_boundary, False)
+
+    def test_for_very_close_to_zero(self):
+
+        H = np.array([[0.88547534, 2.90692271, 0.98440885, -0.78911503, -0.28035809],
+                      [2.90692271, -0.04618819, 0.32867263, -0.83737945, 0.17116396],
+                      [0.98440885, 0.32867263, -0.87355957, -0.06521957, -1.43030957],
+                      [-0.78911503, -0.83737945, -0.06521957, -1.645709, -0.33887298],
+                      [-0.28035809, 0.17116396, -1.43030957, -0.33887298, -1.68586978]])
+        g = np.array([0, 0, 0, 0, 1e-6])
+        trust_radius = 1.1
+
+        # Solve Subproblem
+        subprob = KrylovQP(x=0,
+                           fun=lambda x: 0,
+                           jac=lambda x: g,
+                           hess=lambda x: None,
+                           hessp=lambda x, y: H.dot(y))
+        p, hits_boundary = subprob.solve(trust_radius)
+
+        # check kkt satisfaction
+        assert_almost_equal(
+                np.linalg.norm(H.dot(p) + subprob.lam * p + g),
+                0.0)
+        # check trust region constraint
+        assert_almost_equal(np.linalg.norm(p), trust_radius)
+
+        assert_array_almost_equal(p, [0.06910534, -0.01432721,
+                                      -0.65311947, -0.23815972,
+                                      -0.84954934])
+        assert_array_almost_equal(hits_boundary, True)
+
+    def test_disp(self, capsys):
+        H = -np.eye(5)
+        g = np.array([0, 0, 0, 0, 1e-6])
+        trust_radius = 1.1
+
+        subprob = KrylovQP_disp(x=0,
+                                fun=lambda x: 0,
+                                jac=lambda x: g,
+                                hess=lambda x: None,
+                                hessp=lambda x, y: H.dot(y))
+        p, hits_boundary = subprob.solve(trust_radius)
+        out, err = capsys.readouterr()
+        assert_(out.startswith(' TR Solving trust region problem'), repr(out))
+
diff --git a/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_zeros.py b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_zeros.py
new file mode 100644
index 0000000000000000000000000000000000000000..86606d8c4318cb26825a8fa955b2ef7647f4009c
--- /dev/null
+++ b/emissary-ml/llm-scripts/fine-tuning/llama3/venv/lib/python3.10/site-packages/scipy/optimize/tests/test_zeros.py
@@ -0,0 +1,939 @@
+import pytest
+
+from functools import lru_cache
+
+from numpy.testing import (assert_warns, assert_,
+                           assert_allclose,
+                           assert_equal,
+                           assert_array_equal,
+                           suppress_warnings)
+import numpy as np
+from numpy import finfo, power, nan, isclose, sqrt, exp, sin, cos
+
+from scipy import optimize
+from scipy.optimize import (_zeros_py as zeros, newton, root_scalar,
+                            OptimizeResult)
+
+from scipy._lib._util import getfullargspec_no_self as _getfullargspec
+
+# Import testing parameters
+from scipy.optimize._tstutils import get_tests, functions as tstutils_functions
+
+TOL = 4*np.finfo(float).eps  # tolerance
+
+_FLOAT_EPS = finfo(float).eps
+
+bracket_methods = [zeros.bisect, zeros.ridder, zeros.brentq, zeros.brenth,
+                   zeros.toms748]
+gradient_methods = [zeros.newton]
+all_methods = bracket_methods + gradient_methods
+
+# A few test functions used frequently:
+# # A simple quadratic, (x-1)^2 - 1
+def f1(x):
+    return x ** 2 - 2 * x - 1
+
+
+def f1_1(x):
+    return 2 * x - 2
+
+
+def f1_2(x):
+    return 2.0 + 0 * x
+
+
+def f1_and_p_and_pp(x):
+    return f1(x), f1_1(x), f1_2(x)
+
+
+# Simple transcendental function
+def f2(x):
+    return exp(x) - cos(x)
+
+
+def f2_1(x):
+    return exp(x) + sin(x)
+
+
+def f2_2(x):
+    return exp(x) + cos(x)
+
+
+# lru cached function
+@lru_cache
+def f_lrucached(x):
+    return x
+
+
+class TestScalarRootFinders:
+    # Basic tests for all scalar root finders
+
+    xtol = 4 * np.finfo(float).eps
+    rtol = 4 * np.finfo(float).eps
+
+    def _run_one_test(self, tc, method, sig_args_keys=None,
+                      sig_kwargs_keys=None, **kwargs):
+        method_args = []
+        for k in sig_args_keys or []:
+            if k not in tc:
+                # If a,b not present use x0, x1. Similarly for f and func
+                k = {'a': 'x0', 'b': 'x1', 'func': 'f'}.get(k, k)
+            method_args.append(tc[k])
+
+        method_kwargs = dict(**kwargs)
+        method_kwargs.update({'full_output': True, 'disp': False})
+        for k in sig_kwargs_keys or []:
+            method_kwargs[k] = tc[k]
+
+        root = tc.get('root')
+        func_args = tc.get('args', ())
+
+        try:
+            r, rr = method(*method_args, args=func_args, **method_kwargs)
+            return root, rr, tc
+        except Exception:
+            return root, zeros.RootResults(nan, -1, -1, zeros._EVALUEERR, method), tc
+
+    def run_tests(self, tests, method, name, known_fail=None, **kwargs):
+        r"""Run test-cases using the specified method and the supplied signature.
+
+        Extract the arguments for the method call from the test case
+        dictionary using the supplied keys for the method's signature."""
+        # The methods have one of two base signatures:
+        # (f, a, b, **kwargs)  # newton
+        # (func, x0, **kwargs)  # bisect/brentq/...
+
+        # FullArgSpec with args, varargs, varkw, defaults, ...
+        sig = _getfullargspec(method)
+        assert_(not sig.kwonlyargs)
+        nDefaults = len(sig.defaults)
+        nRequired = len(sig.args) - nDefaults
+        sig_args_keys = sig.args[:nRequired]
+        sig_kwargs_keys = []
+        if name in ['secant', 'newton', 'halley']:
+            if name in ['newton', 'halley']:
+                sig_kwargs_keys.append('fprime')
+                if name in ['halley']:
+                    sig_kwargs_keys.append('fprime2')
+            kwargs['tol'] = self.xtol
+        else:
+            kwargs['xtol'] = self.xtol
+            kwargs['rtol'] = self.rtol
+
+        results = [list(self._run_one_test(
+            tc, method, sig_args_keys=sig_args_keys,
+            sig_kwargs_keys=sig_kwargs_keys, **kwargs)) for tc in tests]
+        # results= [[true root, full output, tc], ...]
+
+        known_fail = known_fail or []
+        notcvgd = [elt for elt in results if not elt[1].converged]
+        notcvgd = [elt for elt in notcvgd if elt[-1]['ID'] not in known_fail]
+        notcvged_IDS = [elt[-1]['ID'] for elt in notcvgd]
+        assert_equal([len(notcvged_IDS), notcvged_IDS], [0, []])
+
+        # The usable xtol and rtol depend on the test
+        tols = {'xtol': self.xtol, 'rtol': self.rtol}
+        tols.update(**kwargs)
+        rtol = tols['rtol']
+        atol = tols.get('tol', tols['xtol'])
+
+        cvgd = [elt for elt in results if elt[1].converged]
+        approx = [elt[1].root for elt in cvgd]
+        correct = [elt[0] for elt in cvgd]
+        # See if the root matches the reference value
+        notclose = [[a] + elt for a, c, elt in zip(approx, correct, cvgd) if
+                    not isclose(a, c, rtol=rtol, atol=atol)
+                    and elt[-1]['ID'] not in known_fail]
+        # If not, evaluate the function and see if is 0 at the purported root
+        fvs = [tc['f'](aroot, *tc.get('args', tuple()))
+               for aroot, c, fullout, tc in notclose]
+        notclose = [[fv] + elt for fv, elt in zip(fvs, notclose) if fv != 0]
+        assert_equal([notclose, len(notclose)], [[], 0])
+        method_from_result = [result[1].method for result in results]
+        expected_method = [name for _ in results]
+        assert_equal(method_from_result, expected_method)
+
+    def run_collection(self, collection, method, name, smoothness=None,
+                       known_fail=None, **kwargs):
+        r"""Run a collection of tests using the specified method.
+
+        The name is used to determine some optional arguments."""
+        tests = get_tests(collection, smoothness=smoothness)
+        self.run_tests(tests, method, name, known_fail=known_fail, **kwargs)
+
+
+class TestBracketMethods(TestScalarRootFinders):
+    @pytest.mark.parametrize('method', bracket_methods)
+    @pytest.mark.parametrize('function', tstutils_functions)
+    def test_basic_root_scalar(self, method, function):
+        # Tests bracketing root finders called via `root_scalar` on a small
+        # set of simple problems, each of which has a root at `x=1`. Checks for
+        # converged status and that the root was found.
+        a, b = .5, sqrt(3)
+
+        r = root_scalar(function, method=method.__name__, bracket=[a, b], x0=a,
+                        xtol=self.xtol, rtol=self.rtol)
+        assert r.converged
+        assert_allclose(r.root, 1.0, atol=self.xtol, rtol=self.rtol)
+        assert r.method == method.__name__
+
+    @pytest.mark.parametrize('method', bracket_methods)
+    @pytest.mark.parametrize('function', tstutils_functions)
+    def test_basic_individual(self, method, function):
+        # Tests individual bracketing root finders on a small set of simple
+        # problems, each of which has a root at `x=1`. Checks for converged
+        # status and that the root was found.
+        a, b = .5, sqrt(3)
+        root, r = method(function, a, b, xtol=self.xtol, rtol=self.rtol,
+                         full_output=True)
+
+        assert r.converged
+        assert_allclose(root, 1.0, atol=self.xtol, rtol=self.rtol)
+
+    @pytest.mark.parametrize('method', bracket_methods)
+    def test_aps_collection(self, method):
+        self.run_collection('aps', method, method.__name__, smoothness=1)
+
+    @pytest.mark.parametrize('method', [zeros.bisect, zeros.ridder,
+                                        zeros.toms748])
+    def test_chandrupatla_collection(self, method):
+        known_fail = {'fun7.4'} if method == zeros.ridder else {}
+        self.run_collection('chandrupatla', method, method.__name__,
+                            known_fail=known_fail)
+
+    @pytest.mark.parametrize('method', bracket_methods)
+    def test_lru_cached_individual(self, method):
+        # check that https://github.com/scipy/scipy/issues/10846 is fixed
+        # (`root_scalar` failed when passed a function that was `@lru_cache`d)
+        a, b = -1, 1
+        root, r = method(f_lrucached, a, b, full_output=True)
+        assert r.converged
+        assert_allclose(root, 0)
+
+
+class TestNewton(TestScalarRootFinders):
+    def test_newton_collections(self):
+        known_fail = ['aps.13.00']
+        known_fail += ['aps.12.05', 'aps.12.17']  # fails under Windows Py27
+        for collection in ['aps', 'complex']:
+            self.run_collection(collection, zeros.newton, 'newton',
+                                smoothness=2, known_fail=known_fail)
+
+    def test_halley_collections(self):
+        known_fail = ['aps.12.06', 'aps.12.07', 'aps.12.08', 'aps.12.09',
+                      'aps.12.10', 'aps.12.11', 'aps.12.12', 'aps.12.13',
+                      'aps.12.14', 'aps.12.15', 'aps.12.16', 'aps.12.17',
+                      'aps.12.18', 'aps.13.00']
+        for collection in ['aps', 'complex']:
+            self.run_collection(collection, zeros.newton, 'halley',
+                                smoothness=2, known_fail=known_fail)
+
+    def test_newton(self):
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            x = zeros.newton(f, 3, tol=1e-6)
+            assert_allclose(f(x), 0, atol=1e-6)
+            x = zeros.newton(f, 3, x1=5, tol=1e-6)  # secant, x0 and x1
+            assert_allclose(f(x), 0, atol=1e-6)
+            x = zeros.newton(f, 3, fprime=f_1, tol=1e-6)   # newton
+            assert_allclose(f(x), 0, atol=1e-6)
+            x = zeros.newton(f, 3, fprime=f_1, fprime2=f_2, tol=1e-6)  # halley
+            assert_allclose(f(x), 0, atol=1e-6)
+
+    def test_newton_by_name(self):
+        r"""Invoke newton through root_scalar()"""
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            r = root_scalar(f, method='newton', x0=3, fprime=f_1, xtol=1e-6)
+            assert_allclose(f(r.root), 0, atol=1e-6)
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            r = root_scalar(f, method='newton', x0=3, xtol=1e-6)  # without f'
+            assert_allclose(f(r.root), 0, atol=1e-6)
+
+    def test_secant_by_name(self):
+        r"""Invoke secant through root_scalar()"""
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            r = root_scalar(f, method='secant', x0=3, x1=2, xtol=1e-6)
+            assert_allclose(f(r.root), 0, atol=1e-6)
+            r = root_scalar(f, method='secant', x0=3, x1=5, xtol=1e-6)
+            assert_allclose(f(r.root), 0, atol=1e-6)
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            r = root_scalar(f, method='secant', x0=3, xtol=1e-6)  # without x1
+            assert_allclose(f(r.root), 0, atol=1e-6)
+
+    def test_halley_by_name(self):
+        r"""Invoke halley through root_scalar()"""
+        for f, f_1, f_2 in [(f1, f1_1, f1_2), (f2, f2_1, f2_2)]:
+            r = root_scalar(f, method='halley', x0=3,
+                            fprime=f_1, fprime2=f_2, xtol=1e-6)
+            assert_allclose(f(r.root), 0, atol=1e-6)
+
+    def test_root_scalar_fail(self):
+        message = 'fprime2 must be specified for halley'
+        with pytest.raises(ValueError, match=message):
+            root_scalar(f1, method='halley', fprime=f1_1, x0=3, xtol=1e-6)  # no fprime2
+        message = 'fprime must be specified for halley'
+        with pytest.raises(ValueError, match=message):
+            root_scalar(f1, method='halley', fprime2=f1_2, x0=3, xtol=1e-6)  # no fprime
+
+    def test_array_newton(self):
+        """test newton with array"""
+
+        def f1(x, *a):
+            b = a[0] + x * a[3]
+            return a[1] - a[2] * (np.exp(b / a[5]) - 1.0) - b / a[4] - x
+
+        def f1_1(x, *a):
+            b = a[3] / a[5]
+            return -a[2] * np.exp(a[0] / a[5] + x * b) * b - a[3] / a[4] - 1
+
+        def f1_2(x, *a):
+            b = a[3] / a[5]
+            return -a[2] * np.exp(a[0] / a[5] + x * b) * b**2
+
+        a0 = np.array([
+            5.32725221, 5.48673747, 5.49539973,
+            5.36387202, 4.80237316, 1.43764452,
+            5.23063958, 5.46094772, 5.50512718,
+            5.42046290
+        ])
+        a1 = (np.sin(range(10)) + 1.0) * 7.0
+        args = (a0, a1, 1e-09, 0.004, 10, 0.27456)
+        x0 = [7.0] * 10
+        x = zeros.newton(f1, x0, f1_1, args)
+        x_expected = (
+            6.17264965, 11.7702805, 12.2219954,
+            7.11017681, 1.18151293, 0.143707955,
+            4.31928228, 10.5419107, 12.7552490,
+            8.91225749
+        )
+        assert_allclose(x, x_expected)
+        # test halley's
+        x = zeros.newton(f1, x0, f1_1, args, fprime2=f1_2)
+        assert_allclose(x, x_expected)
+        # test secant
+        x = zeros.newton(f1, x0, args=args)
+        assert_allclose(x, x_expected)
+
+    def test_array_newton_complex(self):
+        def f(x):
+            return x + 1+1j
+
+        def fprime(x):
+            return 1.0
+
+        t = np.full(4, 1j)
+        x = zeros.newton(f, t, fprime=fprime)
+        assert_allclose(f(x), 0.)
+
+        # should work even if x0 is not complex
+        t = np.ones(4)
+        x = zeros.newton(f, t, fprime=fprime)
+        assert_allclose(f(x), 0.)
+
+        x = zeros.newton(f, t)
+        assert_allclose(f(x), 0.)
+
+    def test_array_secant_active_zero_der(self):
+        """test secant doesn't continue to iterate zero derivatives"""
+        x = zeros.newton(lambda x, *a: x*x - a[0], x0=[4.123, 5],
+                         args=[np.array([17, 25])])
+        assert_allclose(x, (4.123105625617661, 5.0))
+
+    def test_array_newton_integers(self):
+        # test secant with float
+        x = zeros.newton(lambda y, z: z - y ** 2, [4.0] * 2,
+                         args=([15.0, 17.0],))
+        assert_allclose(x, (3.872983346207417, 4.123105625617661))
+        # test integer becomes float
+        x = zeros.newton(lambda y, z: z - y ** 2, [4] * 2, args=([15, 17],))
+        assert_allclose(x, (3.872983346207417, 4.123105625617661))
+
+    def test_array_newton_zero_der_failures(self):
+        # test derivative zero warning
+        assert_warns(RuntimeWarning, zeros.newton,
+                     lambda y: y**2 - 2, [0., 0.], lambda y: 2 * y)
+        # test failures and zero_der
+        with pytest.warns(RuntimeWarning):
+            results = zeros.newton(lambda y: y**2 - 2, [0., 0.],
+                                   lambda y: 2*y, full_output=True)
+            assert_allclose(results.root, 0)
+            assert results.zero_der.all()
+            assert not results.converged.any()
+
+    def test_newton_combined(self):
+        def f1(x):
+            return x ** 2 - 2 * x - 1
+        def f1_1(x):
+            return 2 * x - 2
+        def f1_2(x):
+            return 2.0 + 0 * x
+
+        def f1_and_p_and_pp(x):
+            return x**2 - 2*x-1, 2*x-2, 2.0
+
+        sol0 = root_scalar(f1, method='newton', x0=3, fprime=f1_1)
+        sol = root_scalar(f1_and_p_and_pp, method='newton', x0=3, fprime=True)
+        assert_allclose(sol0.root, sol.root, atol=1e-8)
+        assert_equal(2*sol.function_calls, sol0.function_calls)
+
+        sol0 = root_scalar(f1, method='halley', x0=3, fprime=f1_1, fprime2=f1_2)
+        sol = root_scalar(f1_and_p_and_pp, method='halley', x0=3, fprime2=True)
+        assert_allclose(sol0.root, sol.root, atol=1e-8)
+        assert_equal(3*sol.function_calls, sol0.function_calls)
+
+    def test_newton_full_output(self, capsys):
+        # Test the full_output capability, both when converging and not.
+        # Use simple polynomials, to avoid hitting platform dependencies
+        # (e.g., exp & trig) in number of iterations
+
+        x0 = 3
+        expected_counts = [(6, 7), (5, 10), (3, 9)]
+
+        for derivs in range(3):
+            kwargs = {'tol': 1e-6, 'full_output': True, }
+            for k, v in [['fprime', f1_1], ['fprime2', f1_2]][:derivs]:
+                kwargs[k] = v
+
+            x, r = zeros.newton(f1, x0, disp=False, **kwargs)
+            assert_(r.converged)
+            assert_equal(x, r.root)
+            assert_equal((r.iterations, r.function_calls), expected_counts[derivs])
+            if derivs == 0:
+                assert r.function_calls <= r.iterations + 1
+            else:
+                assert_equal(r.function_calls, (derivs + 1) * r.iterations)
+
+            # Now repeat, allowing one fewer iteration to force convergence failure
+            iters = r.iterations - 1
+            x, r = zeros.newton(f1, x0, maxiter=iters, disp=False, **kwargs)
+            assert_(not r.converged)
+            assert_equal(x, r.root)
+            assert_equal(r.iterations, iters)
+
+            if derivs == 1:
+                # Check that the correct Exception is raised and
+                # validate the start of the message.
+                msg = 'Failed to converge after %d iterations, value is .*' % (iters)
+                with pytest.raises(RuntimeError, match=msg):
+                    x, r = zeros.newton(f1, x0, maxiter=iters, disp=True, **kwargs)
+
+    def test_deriv_zero_warning(self):
+        def func(x):
+            return x ** 2 - 2.0
+        def dfunc(x):
+            return 2 * x
+        assert_warns(RuntimeWarning, zeros.newton, func, 0.0, dfunc, disp=False)
+        with pytest.raises(RuntimeError, match='Derivative was zero'):
+            zeros.newton(func, 0.0, dfunc)
+
+    def test_newton_does_not_modify_x0(self):
+        # https://github.com/scipy/scipy/issues/9964
+        x0 = np.array([0.1, 3])
+        x0_copy = x0.copy()  # Copy to test for equality.
+        newton(np.sin, x0, np.cos)
+        assert_array_equal(x0, x0_copy)
+
+    def test_gh17570_defaults(self):
+        # Previously, when fprime was not specified, root_scalar would default
+        # to secant. When x1 was not specified, secant failed.
+        # Check that without fprime, the default is secant if x1 is specified
+        # and newton otherwise.
+        res_newton_default = root_scalar(f1, method='newton', x0=3, xtol=1e-6)
+        res_secant_default = root_scalar(f1, method='secant', x0=3, x1=2,
+                                         xtol=1e-6)
+        # `newton` uses the secant method when `x1` and `x2` are specified
+        res_secant = newton(f1, x0=3, x1=2, tol=1e-6, full_output=True)[1]
+
+        # all three found a root
+        assert_allclose(f1(res_newton_default.root), 0, atol=1e-6)
+        assert res_newton_default.root.shape == tuple()
+        assert_allclose(f1(res_secant_default.root), 0, atol=1e-6)
+        assert res_secant_default.root.shape == tuple()
+        assert_allclose(f1(res_secant.root), 0, atol=1e-6)
+        assert res_secant.root.shape == tuple()
+
+        # Defaults are correct
+        assert (res_secant_default.root
+                == res_secant.root
+                != res_newton_default.iterations)
+        assert (res_secant_default.iterations
+                == res_secant_default.function_calls - 1  # true for secant
+                == res_secant.iterations
+                != res_newton_default.iterations
+                == res_newton_default.function_calls/2)  # newton 2-point diff
+
+    @pytest.mark.parametrize('kwargs', [dict(), {'method': 'newton'}])
+    def test_args_gh19090(self, kwargs):
+        def f(x, a, b):
+            assert a == 3
+            assert b == 1
+            return (x ** a - b)
+
+        res = optimize.root_scalar(f, x0=3, args=(3, 1), **kwargs)
+        assert res.converged
+        assert_allclose(res.root, 1)
+
+    @pytest.mark.parametrize('method', ['secant', 'newton'])
+    def test_int_x0_gh19280(self, method):
+        # Originally, `newton` ensured that only floats were passed to the
+        # callable. This was indadvertently changed by gh-17669. Check that
+        # it has been changed back.
+        def f(x):
+            # an integer raised to a negative integer power would fail
+            return x**-2 - 2
+
+        res = optimize.root_scalar(f, x0=1, method=method)
+        assert res.converged
+        assert_allclose(abs(res.root), 2**-0.5)
+        assert res.root.dtype == np.dtype(np.float64)
+
+
+def test_gh_5555():
+    root = 0.1
+
+    def f(x):
+        return x - root
+
+    methods = [zeros.bisect, zeros.ridder]
+    xtol = rtol = TOL
+    for method in methods:
+        res = method(f, -1e8, 1e7, xtol=xtol, rtol=rtol)
+        assert_allclose(root, res, atol=xtol, rtol=rtol,
+                        err_msg='method %s' % method.__name__)
+
+
+def test_gh_5557():
+    # Show that without the changes in 5557 brentq and brenth might
+    # only achieve a tolerance of 2*(xtol + rtol*|res|).
+
+    # f linearly interpolates (0, -0.1), (0.5, -0.1), and (1,
+    # 0.4). The important parts are that |f(0)| < |f(1)| (so that
+    # brent takes 0 as the initial guess), |f(0)| < atol (so that
+    # brent accepts 0 as the root), and that the exact root of f lies
+    # more than atol away from 0 (so that brent doesn't achieve the
+    # desired tolerance).
+    def f(x):
+        if x < 0.5:
+            return -0.1
+        else:
+            return x - 0.6
+
+    atol = 0.51
+    rtol = 4 * _FLOAT_EPS
+    methods = [zeros.brentq, zeros.brenth]
+    for method in methods:
+        res = method(f, 0, 1, xtol=atol, rtol=rtol)
+        assert_allclose(0.6, res, atol=atol, rtol=rtol)
+
+
+def test_brent_underflow_in_root_bracketing():
+    # Testing if an interval [a,b] brackets a zero of a function
+    # by checking f(a)*f(b) < 0 is not reliable when the product
+    # underflows/overflows. (reported in issue# 13737)
+
+    underflow_scenario = (-450.0, -350.0, -400.0)
+    overflow_scenario = (350.0, 450.0, 400.0)
+
+    for a, b, root in [underflow_scenario, overflow_scenario]:
+        c = np.exp(root)
+        for method in [zeros.brenth, zeros.brentq]:
+            res = method(lambda x: np.exp(x)-c, a, b)
+            assert_allclose(root, res)
+
+
+class TestRootResults:
+    r = zeros.RootResults(root=1.0, iterations=44, function_calls=46, flag=0,
+                          method="newton")
+
+    def test_repr(self):
+        expected_repr = ("      converged: True\n           flag: converged"
+                         "\n function_calls: 46\n     iterations: 44\n"
+                         "           root: 1.0\n         method: newton")
+        assert_equal(repr(self.r), expected_repr)
+
+    def test_type(self):
+        assert isinstance(self.r, OptimizeResult)
+
+
+def test_complex_halley():
+    """Test Halley's works with complex roots"""
+    def f(x, *a):
+        return a[0] * x**2 + a[1] * x + a[2]
+
+    def f_1(x, *a):
+        return 2 * a[0] * x + a[1]
+
+    def f_2(x, *a):
+        retval = 2 * a[0]
+        try:
+            size = len(x)
+        except TypeError:
+            return retval
+        else:
+            return [retval] * size
+
+    z = complex(1.0, 2.0)
+    coeffs = (2.0, 3.0, 4.0)
+    y = zeros.newton(f, z, args=coeffs, fprime=f_1, fprime2=f_2, tol=1e-6)
+    # (-0.75000000000000078+1.1989578808281789j)
+    assert_allclose(f(y, *coeffs), 0, atol=1e-6)
+    z = [z] * 10
+    coeffs = (2.0, 3.0, 4.0)
+    y = zeros.newton(f, z, args=coeffs, fprime=f_1, fprime2=f_2, tol=1e-6)
+    assert_allclose(f(y, *coeffs), 0, atol=1e-6)
+
+
+def test_zero_der_nz_dp(capsys):
+    """Test secant method with a non-zero dp, but an infinite newton step"""
+    # pick a symmetrical functions and choose a point on the side that with dx
+    # makes a secant that is a flat line with zero slope, EG: f = (x - 100)**2,
+    # which has a root at x = 100 and is symmetrical around the line x = 100
+    # we have to pick a really big number so that it is consistently true
+    # now find a point on each side so that the secant has a zero slope
+    dx = np.finfo(float).eps ** 0.33
+    # 100 - p0 = p1 - 100 = p0 * (1 + dx) + dx - 100
+    # -> 200 = p0 * (2 + dx) + dx
+    p0 = (200.0 - dx) / (2.0 + dx)
+    with suppress_warnings() as sup:
+        sup.filter(RuntimeWarning, "RMS of")
+        x = zeros.newton(lambda y: (y - 100.0)**2, x0=[p0] * 10)
+    assert_allclose(x, [100] * 10)
+    # test scalar cases too
+    p0 = (2.0 - 1e-4) / (2.0 + 1e-4)
+    with suppress_warnings() as sup:
+        sup.filter(RuntimeWarning, "Tolerance of")
+        x = zeros.newton(lambda y: (y - 1.0) ** 2, x0=p0, disp=False)
+    assert_allclose(x, 1)
+    with pytest.raises(RuntimeError, match='Tolerance of'):
+        x = zeros.newton(lambda y: (y - 1.0) ** 2, x0=p0, disp=True)
+    p0 = (-2.0 + 1e-4) / (2.0 + 1e-4)
+    with suppress_warnings() as sup:
+        sup.filter(RuntimeWarning, "Tolerance of")
+        x = zeros.newton(lambda y: (y + 1.0) ** 2, x0=p0, disp=False)
+    assert_allclose(x, -1)
+    with pytest.raises(RuntimeError, match='Tolerance of'):
+        x = zeros.newton(lambda y: (y + 1.0) ** 2, x0=p0, disp=True)
+
+
+def test_array_newton_failures():
+    """Test that array newton fails as expected"""
+    # p = 0.68  # [MPa]
+    # dp = -0.068 * 1e6  # [Pa]
+    # T = 323  # [K]
+    diameter = 0.10  # [m]
+    # L = 100  # [m]
+    roughness = 0.00015  # [m]
+    rho = 988.1  # [kg/m**3]
+    mu = 5.4790e-04  # [Pa*s]
+    u = 2.488  # [m/s]
+    reynolds_number = rho * u * diameter / mu  # Reynolds number
+
+    def colebrook_eqn(darcy_friction, re, dia):
+        return (1 / np.sqrt(darcy_friction) +
+                2 * np.log10(roughness / 3.7 / dia +
+                             2.51 / re / np.sqrt(darcy_friction)))
+
+    # only some failures
+    with pytest.warns(RuntimeWarning):
+        result = zeros.newton(
+            colebrook_eqn, x0=[0.01, 0.2, 0.02223, 0.3], maxiter=2,
+            args=[reynolds_number, diameter], full_output=True
+        )
+        assert not result.converged.all()
+    # they all fail
+    with pytest.raises(RuntimeError):
+        result = zeros.newton(
+            colebrook_eqn, x0=[0.01] * 2, maxiter=2,
+            args=[reynolds_number, diameter], full_output=True
+        )
+
+
+# this test should **not** raise a RuntimeWarning
+def test_gh8904_zeroder_at_root_fails():
+    """Test that Newton or Halley don't warn if zero derivative at root"""
+
+    # a function that has a zero derivative at it's root
+    def f_zeroder_root(x):
+        return x**3 - x**2
+
+    # should work with secant
+    r = zeros.newton(f_zeroder_root, x0=0)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    # test again with array
+    r = zeros.newton(f_zeroder_root, x0=[0]*10)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+
+    # 1st derivative
+    def fder(x):
+        return 3 * x**2 - 2 * x
+
+    # 2nd derivative
+    def fder2(x):
+        return 6*x - 2
+
+    # should work with newton and halley
+    r = zeros.newton(f_zeroder_root, x0=0, fprime=fder)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    r = zeros.newton(f_zeroder_root, x0=0, fprime=fder,
+                     fprime2=fder2)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    # test again with array
+    r = zeros.newton(f_zeroder_root, x0=[0]*10, fprime=fder)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    r = zeros.newton(f_zeroder_root, x0=[0]*10, fprime=fder,
+                     fprime2=fder2)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+
+    # also test that if a root is found we do not raise RuntimeWarning even if
+    # the derivative is zero, EG: at x = 0.5, then fval = -0.125 and
+    # fder = -0.25 so the next guess is 0.5 - (-0.125/-0.5) = 0 which is the
+    # root, but if the solver continued with that guess, then it will calculate
+    # a zero derivative, so it should return the root w/o RuntimeWarning
+    r = zeros.newton(f_zeroder_root, x0=0.5, fprime=fder)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    # test again with array
+    r = zeros.newton(f_zeroder_root, x0=[0.5]*10, fprime=fder)
+    assert_allclose(r, 0, atol=zeros._xtol, rtol=zeros._rtol)
+    # doesn't apply to halley
+
+
+def test_gh_8881():
+    r"""Test that Halley's method realizes that the 2nd order adjustment
+    is too big and drops off to the 1st order adjustment."""
+    n = 9
+
+    def f(x):
+        return power(x, 1.0/n) - power(n, 1.0/n)
+
+    def fp(x):
+        return power(x, (1.0-n)/n)/n
+
+    def fpp(x):
+        return power(x, (1.0-2*n)/n) * (1.0/n) * (1.0-n)/n
+
+    x0 = 0.1
+    # The root is at x=9.
+    # The function has positive slope, x0 < root.
+    # Newton succeeds in 8 iterations
+    rt, r = newton(f, x0, fprime=fp, full_output=True)
+    assert r.converged
+    # Before the Issue 8881/PR 8882, halley would send x in the wrong direction.
+    # Check that it now succeeds.
+    rt, r = newton(f, x0, fprime=fp, fprime2=fpp, full_output=True)
+    assert r.converged
+
+
+def test_gh_9608_preserve_array_shape():
+    """
+    Test that shape is preserved for array inputs even if fprime or fprime2 is
+    scalar
+    """
+    def f(x):
+        return x**2
+
+    def fp(x):
+        return 2 * x
+
+    def fpp(x):
+        return 2
+
+    x0 = np.array([-2], dtype=np.float32)
+    rt, r = newton(f, x0, fprime=fp, fprime2=fpp, full_output=True)
+    assert r.converged
+
+    x0_array = np.array([-2, -3], dtype=np.float32)
+    # This next invocation should fail
+    with pytest.raises(IndexError):
+        result = zeros.newton(
+            f, x0_array, fprime=fp, fprime2=fpp, full_output=True
+        )
+
+    def fpp_array(x):
+        return np.full(np.shape(x), 2, dtype=np.float32)
+
+    result = zeros.newton(
+        f, x0_array, fprime=fp, fprime2=fpp_array, full_output=True
+    )
+    assert result.converged.all()
+
+
+@pytest.mark.parametrize(
+    "maximum_iterations,flag_expected",
+    [(10, zeros.CONVERR), (100, zeros.CONVERGED)])
+def test_gh9254_flag_if_maxiter_exceeded(maximum_iterations, flag_expected):
+    """
+    Test that if the maximum iterations is exceeded that the flag is not
+    converged.
+    """
+    result = zeros.brentq(
+        lambda x: ((1.2*x - 2.3)*x + 3.4)*x - 4.5,
+        -30, 30, (), 1e-6, 1e-6, maximum_iterations,
+        full_output=True, disp=False)
+    assert result[1].flag == flag_expected
+    if flag_expected == zeros.CONVERR:
+        # didn't converge because exceeded maximum iterations
+        assert result[1].iterations == maximum_iterations
+    elif flag_expected == zeros.CONVERGED:
+        # converged before maximum iterations
+        assert result[1].iterations < maximum_iterations
+
+
+def test_gh9551_raise_error_if_disp_true():
+    """Test that if disp is true then zero derivative raises RuntimeError"""
+
+    def f(x):
+        return x*x + 1
+
+    def f_p(x):
+        return 2*x
+
+    assert_warns(RuntimeWarning, zeros.newton, f, 1.0, f_p, disp=False)
+    with pytest.raises(
+            RuntimeError,
+            match=r'^Derivative was zero\. Failed to converge after \d+ iterations, '
+                  r'value is [+-]?\d*\.\d+\.$'):
+        zeros.newton(f, 1.0, f_p)
+    root = zeros.newton(f, complex(10.0, 10.0), f_p)
+    assert_allclose(root, complex(0.0, 1.0))
+
+
+@pytest.mark.parametrize('solver_name',
+                         ['brentq', 'brenth', 'bisect', 'ridder', 'toms748'])
+def test_gh3089_8394(solver_name):
+    # gh-3089 and gh-8394 reported that bracketing solvers returned incorrect
+    # results when they encountered NaNs. Check that this is resolved.
+    def f(x):
+        return np.nan
+
+    solver = getattr(zeros, solver_name)
+    with pytest.raises(ValueError, match="The function value at x..."):
+        solver(f, 0, 1)
+
+
+@pytest.mark.parametrize('method',
+                         ['brentq', 'brenth', 'bisect', 'ridder', 'toms748'])
+def test_gh18171(method):
+    # gh-3089 and gh-8394 reported that bracketing solvers returned incorrect
+    # results when they encountered NaNs. Check that `root_scalar` returns
+    # normally but indicates that convergence was unsuccessful. See gh-18171.
+    def f(x):
+        f._count += 1
+        return np.nan
+    f._count = 0
+
+    res = root_scalar(f, bracket=(0, 1), method=method)
+    assert res.converged is False
+    assert res.flag.startswith("The function value at x")
+    assert res.function_calls == f._count
+    assert str(res.root) in res.flag
+
+
+@pytest.mark.parametrize('solver_name',
+                         ['brentq', 'brenth', 'bisect', 'ridder', 'toms748'])
+@pytest.mark.parametrize('rs_interface', [True, False])
+def test_function_calls(solver_name, rs_interface):
+    # There do not appear to be checks that the bracketing solvers report the
+    # correct number of function evaluations. Check that this is the case.
+    solver = ((lambda f, a, b, **kwargs: root_scalar(f, bracket=(a, b)))
+              if rs_interface else getattr(zeros, solver_name))
+
+    def f(x):
+        f.calls += 1
+        return x**2 - 1
+    f.calls = 0
+
+    res = solver(f, 0, 10, full_output=True)
+
+    if rs_interface:
+        assert res.function_calls == f.calls
+    else:
+        assert res[1].function_calls == f.calls
+
+
+def test_gh_14486_converged_false():
+    """Test that zero slope with secant method results in a converged=False"""
+    def lhs(x):
+        return x * np.exp(-x*x) - 0.07
+
+    with pytest.warns(RuntimeWarning, match='Tolerance of'):
+        res = root_scalar(lhs, method='secant', x0=-0.15, x1=1.0)
+    assert not res.converged
+    assert res.flag == 'convergence error'
+
+    with pytest.warns(RuntimeWarning, match='Tolerance of'):
+        res = newton(lhs, x0=-0.15, x1=1.0, disp=False, full_output=True)[1]
+    assert not res.converged
+    assert res.flag == 'convergence error'
+
+
+@pytest.mark.parametrize('solver_name',
+                         ['brentq', 'brenth', 'bisect', 'ridder', 'toms748'])
+@pytest.mark.parametrize('rs_interface', [True, False])
+def test_gh5584(solver_name, rs_interface):
+    # gh-5584 reported that an underflow can cause sign checks in the algorithm
+    # to fail. Check that this is resolved.
+    solver = ((lambda f, a, b, **kwargs: root_scalar(f, bracket=(a, b)))
+              if rs_interface else getattr(zeros, solver_name))
+
+    def f(x):
+        return 1e-200*x
+
+    # Report failure when signs are the same
+    with pytest.raises(ValueError, match='...must have different signs'):
+        solver(f, -0.5, -0.4, full_output=True)
+
+    # Solve successfully when signs are different
+    res = solver(f, -0.5, 0.4, full_output=True)
+    res = res if rs_interface else res[1]
+    assert res.converged
+    assert_allclose(res.root, 0, atol=1e-8)
+
+    # Solve successfully when one side is negative zero
+    res = solver(f, -0.5, float('-0.0'), full_output=True)
+    res = res if rs_interface else res[1]
+    assert res.converged
+    assert_allclose(res.root, 0, atol=1e-8)
+
+
+def test_gh13407():
+    # gh-13407 reported that the message produced by `scipy.optimize.toms748`
+    # when `rtol < eps` is incorrect, and also that toms748 is unusual in
+    # accepting `rtol` as low as eps while other solvers raise at 4*eps. Check
+    # that the error message has been corrected and that `rtol=eps` can produce
+    # a lower function value than `rtol=4*eps`.
+    def f(x):
+        return x**3 - 2*x - 5
+
+    xtol = 1e-300
+    eps = np.finfo(float).eps
+    x1 = zeros.toms748(f, 1e-10, 1e10, xtol=xtol, rtol=1*eps)
+    f1 = f(x1)
+    x4 = zeros.toms748(f, 1e-10, 1e10, xtol=xtol, rtol=4*eps)
+    f4 = f(x4)
+    assert f1 < f4
+
+    # using old-style syntax to get exactly the same message
+    message = fr"rtol too small \({eps/2:g} < {eps:g}\)"
+    with pytest.raises(ValueError, match=message):
+        zeros.toms748(f, 1e-10, 1e10, xtol=xtol, rtol=eps/2)
+
+
+def test_newton_complex_gh10103():
+    # gh-10103 reported a problem when `newton` is pass a Python complex x0,
+    # no `fprime` (secant method), and no `x1` (`x1` must be constructed).
+    # Check that this is resolved.
+    def f(z):
+        return z - 1
+    res = newton(f, 1+1j)
+    assert_allclose(res, 1, atol=1e-12)
+
+    res = root_scalar(f, x0=1+1j, x1=2+1.5j, method='secant')
+    assert_allclose(res.root, 1, atol=1e-12)
+
+
+@pytest.mark.parametrize('method', all_methods)
+def test_maxiter_int_check_gh10236(method):
+    # gh-10236 reported that the error message when `maxiter` is not an integer
+    # was difficult to interpret. Check that this was resolved (by gh-10907).
+    message = "'float' object cannot be interpreted as an integer"
+    with pytest.raises(TypeError, match=message):
+        method(f1, 0.0, 1.0, maxiter=72.45)