euler314 commited on
Commit
2244d56
·
verified ·
1 Parent(s): fefe0f9

Update cubic_cpp.cpp

Browse files
Files changed (1) hide show
  1. cubic_cpp.cpp +349 -163
cubic_cpp.cpp CHANGED
@@ -1,9 +1,8 @@
1
  #include <pybind11/pybind11.h>
2
  #include <pybind11/numpy.h>
3
  #include <pybind11/stl.h>
4
- #include <pybind11/eigen.h>
5
- #include <Eigen/Dense>
6
  #include <vector>
 
7
  #include <cmath>
8
  #include <algorithm>
9
  #include <random>
@@ -15,7 +14,7 @@ double apply_y_condition(double y) {
15
  return y > 1.0 ? y : 1.0 / y;
16
  }
17
 
18
- // Discriminant calculation
19
  double discriminant_func(double z, double beta, double z_a, double y) {
20
  double y_effective = apply_y_condition(y);
21
 
@@ -25,16 +24,39 @@ double discriminant_func(double z, double beta, double z_a, double y) {
25
  double c = z + z_a + 1.0 - y_effective * (beta * z_a + 1.0 - beta);
26
  double d = 1.0;
27
 
28
- // Simple formula for cubic discriminant
29
- return std::pow((b*c)/(6.0*a*a) - std::pow(b, 3)/(27.0*std::pow(a, 3)) - d/(2.0*a), 2) +
30
- std::pow(c/(3.0*a) - std::pow(b, 2)/(9.0*std::pow(a, 2)), 3);
 
 
 
 
 
 
 
 
 
 
31
  }
32
 
33
- // Function to compute the theoretical max value
34
  double compute_theoretical_max(double a, double y, double beta) {
35
- auto f = [a, y, beta](double k) -> double {
36
- return (y * beta * (a - 1) * k + (a * k + 1) * ((y - 1) * k - 1)) /
37
- ((a * k + 1) * (k * k + k) * y);
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  };
39
 
40
  // Use numerical optimization to find the maximum
@@ -42,10 +64,10 @@ double compute_theoretical_max(double a, double y, double beta) {
42
  double best_k = 1.0;
43
  double best_val = f(best_k);
44
 
45
- // Initial grid search over a wide range
46
- const int num_grid_points = 200;
47
  for (int i = 0; i < num_grid_points; ++i) {
48
- double k = 0.01 + 100.0 * i / (num_grid_points - 1); // From 0.01 to 100
49
  double val = f(k);
50
  if (val > best_val) {
51
  best_val = val;
@@ -56,45 +78,68 @@ double compute_theoretical_max(double a, double y, double beta) {
56
  // Refine with golden section search
57
  double a_gs = std::max(0.01, best_k / 10.0);
58
  double b_gs = best_k * 10.0;
59
- const double golden_ratio = (1.0 + std::sqrt(5.0)) / 2.0;
60
- const double tolerance = 1e-10;
61
 
62
  double c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
63
  double d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
 
 
64
 
65
- while (std::abs(b_gs - a_gs) > tolerance) {
66
- if (f(c_gs) > f(d_gs)) {
 
67
  b_gs = d_gs;
68
  d_gs = c_gs;
69
  c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
 
 
70
  } else {
71
  a_gs = c_gs;
72
  c_gs = d_gs;
73
  d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
 
 
74
  }
75
  }
76
 
77
  return f((a_gs + b_gs) / 2.0);
78
  }
79
 
80
- // Function to compute the theoretical min value
81
  double compute_theoretical_min(double a, double y, double beta) {
82
- auto f = [a, y, beta](double t) -> double {
83
- return (y * beta * (a - 1) * t + (a * t + 1) * ((y - 1) * t - 1)) /
84
- ((a * t + 1) * (t * t + t) * y);
 
 
 
 
 
 
 
 
 
 
 
 
85
  };
86
 
87
- // Use numerical optimization to find the minimum
88
- // Grid search followed by golden section search
89
- double best_t = -0.5 / a; // Midpoint of (-1/a, 0)
 
 
90
  double best_val = f(best_t);
91
 
92
  // Initial grid search over the range (-1/a, 0)
93
- const int num_grid_points = 200;
 
 
 
94
  for (int i = 1; i < num_grid_points; ++i) {
95
- // From slightly above -1/a to slightly below 0
96
- double t = -0.999/a + 0.998/a * i / (num_grid_points - 1);
97
- if (t >= 0 || t <= -1.0/a) continue; // Ensure t is in range (-1/a, 0)
98
 
99
  double val = f(t);
100
  if (val < best_val) {
@@ -104,32 +149,155 @@ double compute_theoretical_min(double a, double y, double beta) {
104
  }
105
 
106
  // Refine with golden section search
107
- double a_gs = -0.999/a; // Slightly above -1/a
108
- double b_gs = -0.001/a; // Slightly below 0
109
- const double golden_ratio = (1.0 + std::sqrt(5.0)) / 2.0;
110
- const double tolerance = 1e-10;
111
 
112
  double c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
113
  double d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
 
 
114
 
115
- while (std::abs(b_gs - a_gs) > tolerance) {
116
- if (f(c_gs) < f(d_gs)) {
 
117
  b_gs = d_gs;
118
  d_gs = c_gs;
119
  c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
 
 
120
  } else {
121
  a_gs = c_gs;
122
  c_gs = d_gs;
123
  d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
 
 
124
  }
125
  }
126
 
127
  return f((a_gs + b_gs) / 2.0);
128
  }
129
 
130
- // Compute eigenvalues for a given beta value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  std::tuple<double, double> compute_eigenvalues_for_beta(double z_a, double y, double beta, int n, int seed) {
132
- // Apply the condition for y
133
  double y_effective = apply_y_condition(y);
134
 
135
  // Set random seed
@@ -139,16 +307,27 @@ std::tuple<double, double> compute_eigenvalues_for_beta(double z_a, double y, do
139
  // Compute dimension p based on aspect ratio y
140
  int p = static_cast<int>(y_effective * n);
141
 
142
- // Generate random matrix X
143
- Eigen::MatrixXd X(p, n);
144
  for (int i = 0; i < p; i++) {
145
  for (int j = 0; j < n; j++) {
146
- X(i, j) = norm(gen);
147
  }
148
  }
149
 
150
- // Compute sample covariance matrix S_n = (1/n) * X * X^T
151
- Eigen::MatrixXd S_n = (X * X.transpose()) / static_cast<double>(n);
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  // Build T_n diagonal matrix
154
  int k = static_cast<int>(std::floor(beta * p));
@@ -159,31 +338,33 @@ std::tuple<double, double> compute_eigenvalues_for_beta(double z_a, double y, do
159
  // Shuffle diagonal entries
160
  std::shuffle(diags.begin(), diags.end(), gen);
161
 
162
- // Create T_n and its square root
163
- Eigen::MatrixXd T_n = Eigen::MatrixXd::Zero(p, p);
164
- Eigen::MatrixXd T_sqrt = Eigen::MatrixXd::Zero(p, p);
165
-
166
  for (int i = 0; i < p; i++) {
167
- double v = diags[i];
168
- T_n(i, i) = v;
169
- T_sqrt(i, i) = std::sqrt(v);
170
  }
171
 
172
- // Form B = T_sqrt * S_n * T_sqrt (symmetric)
173
- Eigen::MatrixXd B = T_sqrt * S_n * T_sqrt;
 
 
 
 
 
 
174
 
175
- // Compute eigenvalues of B
176
- Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> solver(B);
177
- Eigen::VectorXd eigenvalues = solver.eigenvalues();
178
 
179
- // Return min and max eigenvalues
180
- double min_eigenvalue = eigenvalues(0);
181
- double max_eigenvalue = eigenvalues(p-1);
182
 
183
- return std::make_tuple(min_eigenvalue, max_eigenvalue);
 
184
  }
185
 
186
- // Compute eigenvalue support boundaries
187
  std::tuple<std::vector<double>, std::vector<double>, std::vector<double>, std::vector<double>>
188
  compute_eigenvalue_support_boundaries(double z_a, double y, const std::vector<double>& beta_values,
189
  int n_samples, int seeds) {
@@ -193,18 +374,25 @@ compute_eigenvalue_support_boundaries(double z_a, double y, const std::vector<do
193
  std::vector<double> theoretical_min_values(num_betas, 0.0);
194
  std::vector<double> theoretical_max_values(num_betas, 0.0);
195
 
 
 
196
  for (size_t i = 0; i < num_betas; i++) {
197
  double beta = beta_values[i];
198
-
199
- // Calculate theoretical values
200
  theoretical_max_values[i] = compute_theoretical_max(z_a, y, beta);
201
  theoretical_min_values[i] = compute_theoretical_min(z_a, y, beta);
 
 
 
 
 
202
 
203
  std::vector<double> min_vals;
204
  std::vector<double> max_vals;
205
 
206
- // Run multiple trials with different seeds
207
- for (int seed = 0; seed < seeds; seed++) {
 
 
208
  auto [min_eig, max_eig] = compute_eigenvalues_for_beta(z_a, y, beta, n_samples, seed);
209
  min_vals.push_back(min_eig);
210
  max_vals.push_back(max_eig);
@@ -215,72 +403,85 @@ compute_eigenvalue_support_boundaries(double z_a, double y, const std::vector<do
215
  for (double val : min_vals) min_sum += val;
216
  for (double val : max_vals) max_sum += val;
217
 
218
- min_eigenvalues[i] = min_vals.empty() ? 0.0 : min_sum / min_vals.size();
219
- max_eigenvalues[i] = max_vals.empty() ? 0.0 : max_sum / max_vals.size();
220
  }
221
 
222
  return std::make_tuple(min_eigenvalues, max_eigenvalues, theoretical_min_values, theoretical_max_values);
223
  }
224
 
225
- // Find zeros of discriminant
226
  std::vector<double> find_z_at_discriminant_zero(double z_a, double y, double beta,
227
  double z_min, double z_max, int steps) {
228
  std::vector<double> roots_found;
229
  double y_effective = apply_y_condition(y);
230
 
231
- // Create z grid
232
- std::vector<double> z_grid(steps);
233
- double step_size = (z_max - z_min) / (steps - 1);
234
- for (int i = 0; i < steps; i++) {
235
- z_grid[i] = z_min + i * step_size;
236
- }
237
 
238
- // Evaluate discriminant at each grid point
239
- std::vector<double> disc_vals(steps);
240
- for (int i = 0; i < steps; i++) {
241
- disc_vals[i] = discriminant_func(z_grid[i], beta, z_a, y_effective);
242
- }
243
 
244
- // Find sign changes (zeros)
245
- for (int i = 0; i < steps - 1; i++) {
246
- double f1 = disc_vals[i];
247
- double f2 = disc_vals[i+1];
248
 
249
- if (std::isnan(f1) || std::isnan(f2)) {
 
 
250
  continue;
251
  }
252
 
253
- if (f1 == 0.0) {
254
- roots_found.push_back(z_grid[i]);
255
- } else if (f2 == 0.0) {
256
- roots_found.push_back(z_grid[i+1]);
257
- } else if (f1 * f2 < 0) {
258
- // Binary search for zero crossing
259
- double zl = z_grid[i];
260
- double zr = z_grid[i+1];
261
- double f1_copy = f1;
262
- for (int iter = 0; iter < 50; iter++) {
263
- double mid = 0.5 * (zl + zr);
264
- double fm = discriminant_func(mid, beta, z_a, y_effective);
265
- if (fm == 0.0) {
266
- zl = zr = mid;
 
 
 
 
 
 
 
 
267
  break;
268
  }
269
- if ((fm < 0 && f1_copy < 0) || (fm > 0 && f1_copy > 0)) {
270
- zl = mid;
271
- f1_copy = fm;
 
272
  } else {
273
- zr = mid;
 
274
  }
275
  }
276
- roots_found.push_back(0.5 * (zl + zr));
 
 
 
 
277
  }
 
 
 
278
  }
279
 
280
  return roots_found;
281
  }
282
 
283
- // Sweep beta and find z bounds
284
  std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>
285
  sweep_beta_and_find_z_bounds(double z_a, double y, double z_min, double z_max,
286
  int beta_steps, int z_steps) {
@@ -288,11 +489,14 @@ sweep_beta_and_find_z_bounds(double z_a, double y, double z_min, double z_max,
288
  std::vector<double> z_min_values(beta_steps);
289
  std::vector<double> z_max_values(beta_steps);
290
 
 
 
 
291
  double beta_step = 1.0 / (beta_steps - 1);
292
  for (int i = 0; i < beta_steps; i++) {
293
  betas[i] = i * beta_step;
294
 
295
- std::vector<double> roots = find_z_at_discriminant_zero(z_a, y, betas[i], z_min, z_max, z_steps);
296
 
297
  if (roots.empty()) {
298
  z_min_values[i] = std::numeric_limits<double>::quiet_NaN();
@@ -310,7 +514,7 @@ sweep_beta_and_find_z_bounds(double z_a, double y, double z_min, double z_max,
310
  return std::make_tuple(betas, z_min_values, z_max_values);
311
  }
312
 
313
- // Compute high y curve
314
  std::vector<double> compute_high_y_curve(const std::vector<double>& betas, double z_a, double y) {
315
  double y_effective = apply_y_condition(y);
316
  size_t n = betas.size();
@@ -320,39 +524,49 @@ std::vector<double> compute_high_y_curve(const std::vector<double>& betas, doubl
320
  double denominator = 1.0 - 2.0 * a;
321
 
322
  if (std::abs(denominator) < 1e-10) {
323
- // Handle division by zero
324
  std::fill(result.begin(), result.end(), std::numeric_limits<double>::quiet_NaN());
325
  return result;
326
  }
327
 
 
 
 
 
 
328
  for (size_t i = 0; i < n; i++) {
329
  double beta = betas[i];
330
- double numerator = -4.0 * a * (a - 1.0) * y_effective * beta - 2.0 * a * y_effective - 2.0 * a * (2.0 * a - 1.0);
331
  result[i] = numerator / denominator;
332
  }
333
 
334
  return result;
335
  }
336
 
337
- // Compute alternate low expression
338
  std::vector<double> compute_alternate_low_expr(const std::vector<double>& betas, double z_a, double y) {
339
  double y_effective = apply_y_condition(y);
340
  size_t n = betas.size();
341
  std::vector<double> result(n);
342
 
 
 
 
 
 
 
343
  for (size_t i = 0; i < n; i++) {
344
  double beta = betas[i];
345
- result[i] = (z_a * y_effective * beta * (z_a - 1.0) - 2.0 * z_a * (1.0 - y_effective) - 2.0 * z_a * z_a) / (2.0 + 2.0 * z_a);
346
  }
347
 
348
  return result;
349
  }
350
 
351
- // Compute max k expression over a range of betas
352
  std::vector<double> compute_max_k_expression(const std::vector<double>& betas, double z_a, double y) {
353
  size_t n = betas.size();
354
  std::vector<double> result(n);
355
 
 
 
356
  for (size_t i = 0; i < n; i++) {
357
  result[i] = compute_theoretical_max(z_a, y, betas[i]);
358
  }
@@ -360,11 +574,12 @@ std::vector<double> compute_max_k_expression(const std::vector<double>& betas, d
360
  return result;
361
  }
362
 
363
- // Compute min t expression over a range of betas
364
  std::vector<double> compute_min_t_expression(const std::vector<double>& betas, double z_a, double y) {
365
  size_t n = betas.size();
366
  std::vector<double> result(n);
367
 
 
 
368
  for (size_t i = 0; i < n; i++) {
369
  result[i] = compute_theoretical_min(z_a, y, betas[i]);
370
  }
@@ -372,43 +587,8 @@ std::vector<double> compute_min_t_expression(const std::vector<double>& betas, d
372
  return result;
373
  }
374
 
375
- // Compute derivatives
376
- std::tuple<std::vector<double>, std::vector<double>>
377
- compute_derivatives(const std::vector<double>& curve, const std::vector<double>& betas) {
378
- size_t n = betas.size();
379
- std::vector<double> d1(n, 0.0);
380
- std::vector<double> d2(n, 0.0);
381
-
382
- // First derivative using central difference
383
- for (size_t i = 1; i < n - 1; i++) {
384
- double h = betas[i+1] - betas[i-1];
385
- d1[i] = (curve[i+1] - curve[i-1]) / h;
386
- }
387
-
388
- // Handle endpoints with forward/backward difference
389
- if (n > 1) {
390
- d1[0] = (curve[1] - curve[0]) / (betas[1] - betas[0]);
391
- d1[n-1] = (curve[n-1] - curve[n-2]) / (betas[n-1] - betas[n-2]);
392
- }
393
-
394
- // Second derivative using central difference
395
- for (size_t i = 1; i < n - 1; i++) {
396
- double h = betas[i+1] - betas[i-1];
397
- d2[i] = 2.0 * (curve[i+1] - 2.0 * curve[i] + curve[i-1]) / (h * h);
398
- }
399
-
400
- // Handle endpoints
401
- if (n > 2) {
402
- d2[0] = d2[1];
403
- d2[n-1] = d2[n-2];
404
- }
405
-
406
- return std::make_tuple(d1, d2);
407
- }
408
-
409
- // Generate eigenvalue distribution for a specific beta
410
  std::vector<double> generate_eigenvalue_distribution(double beta, double y, double z_a, int n, int seed) {
411
- // Apply the condition for y
412
  double y_effective = apply_y_condition(y);
413
 
414
  // Set random seed
@@ -419,15 +599,26 @@ std::vector<double> generate_eigenvalue_distribution(double beta, double y, doub
419
  int p = static_cast<int>(y_effective * n);
420
 
421
  // Generate random matrix X
422
- Eigen::MatrixXd X(p, n);
423
  for (int i = 0; i < p; i++) {
424
  for (int j = 0; j < n; j++) {
425
- X(i, j) = norm(gen);
426
  }
427
  }
428
 
429
- // Compute sample covariance matrix S_n = (1/n) * X * X^T
430
- Eigen::MatrixXd S_n = (X * X.transpose()) / static_cast<double>(n);
 
 
 
 
 
 
 
 
 
 
 
431
 
432
  // Build T_n diagonal matrix
433
  int k = static_cast<int>(std::floor(beta * p));
@@ -438,24 +629,19 @@ std::vector<double> generate_eigenvalue_distribution(double beta, double y, doub
438
  // Shuffle diagonal entries
439
  std::shuffle(diags.begin(), diags.end(), gen);
440
 
441
- // Create T_n
442
- Eigen::MatrixXd T_n = Eigen::MatrixXd::Zero(p, p);
443
  for (int i = 0; i < p; i++) {
444
- T_n(i, i) = diags[i];
 
 
445
  }
446
 
447
- // Compute B_n = S_n * T_n
448
- Eigen::MatrixXd B_n = S_n * T_n;
449
-
450
- // Compute eigenvalues
451
- Eigen::EigenSolver<Eigen::MatrixXd> solver(B_n);
452
-
453
- // Extract and return real parts of eigenvalues
454
- std::vector<double> eigenvalues(p);
455
- for (int i = 0; i < p; i++) {
456
- eigenvalues[i] = solver.eigenvalues()(i).real();
457
- }
458
 
 
459
  std::sort(eigenvalues.begin(), eigenvalues.end());
460
  return eigenvalues;
461
  }
 
1
  #include <pybind11/pybind11.h>
2
  #include <pybind11/numpy.h>
3
  #include <pybind11/stl.h>
 
 
4
  #include <vector>
5
+ #include <complex>
6
  #include <cmath>
7
  #include <algorithm>
8
  #include <random>
 
14
  return y > 1.0 ? y : 1.0 / y;
15
  }
16
 
17
+ // Fast discriminant calculation
18
  double discriminant_func(double z, double beta, double z_a, double y) {
19
  double y_effective = apply_y_condition(y);
20
 
 
24
  double c = z + z_a + 1.0 - y_effective * (beta * z_a + 1.0 - beta);
25
  double d = 1.0;
26
 
27
+ // Standard formula for cubic discriminant - optimized calculation
28
+ double p1 = b*c/(6.0*a*a);
29
+ double p2 = b*b*b/(27.0*a*a*a);
30
+ double p3 = d/(2.0*a);
31
+ double term1 = p1 - p2 - p3;
32
+ term1 *= term1;
33
+
34
+ double q1 = c/(3.0*a);
35
+ double q2 = b*b/(9.0*a*a);
36
+ double term2 = q1 - q2;
37
+ term2 = term2*term2*term2;
38
+
39
+ return term1 + term2;
40
  }
41
 
42
+ // Function to compute the theoretical max value - optimized with fewer function calls
43
  double compute_theoretical_max(double a, double y, double beta) {
44
+ // Exit early if parameters would cause division by zero or other issues
45
+ if (a <= 0 || y <= 0 || beta < 0 || beta > 1) {
46
+ return 0.0;
47
+ }
48
+
49
+ // Precompute constants for the formula
50
+ double y_effective = apply_y_condition(y);
51
+ double beta_term = y_effective * beta * (a - 1);
52
+ double y_term = y_effective - 1.0;
53
+
54
+ auto f = [a, beta_term, y_term, y_effective](double k) -> double {
55
+ // Fast evaluation of the function
56
+ double ak_plus_1 = a * k + 1.0;
57
+ double numerator = beta_term * k + ak_plus_1 * (y_term * k - 1.0);
58
+ double denominator = ak_plus_1 * (k * k + k) * y_effective;
59
+ return numerator / denominator;
60
  };
61
 
62
  // Use numerical optimization to find the maximum
 
64
  double best_k = 1.0;
65
  double best_val = f(best_k);
66
 
67
+ // Initial fast grid search with fewer points
68
+ const int num_grid_points = 50; // Reduced from 200
69
  for (int i = 0; i < num_grid_points; ++i) {
70
+ double k = 0.01 + 100.0 * i / (num_grid_points - 1);
71
  double val = f(k);
72
  if (val > best_val) {
73
  best_val = val;
 
78
  // Refine with golden section search
79
  double a_gs = std::max(0.01, best_k / 10.0);
80
  double b_gs = best_k * 10.0;
81
+ const double golden_ratio = 1.618033988749895;
82
+ const double tolerance = 1e-6; // Increased from 1e-10 for speed
83
 
84
  double c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
85
  double d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
86
+ double fc = f(c_gs);
87
+ double fd = f(d_gs);
88
 
89
+ // Limited iterations for faster convergence
90
+ for (int iter = 0; iter < 20 && std::abs(b_gs - a_gs) > tolerance; ++iter) {
91
+ if (fc > fd) {
92
  b_gs = d_gs;
93
  d_gs = c_gs;
94
  c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
95
+ fd = fc;
96
+ fc = f(c_gs);
97
  } else {
98
  a_gs = c_gs;
99
  c_gs = d_gs;
100
  d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
101
+ fc = fd;
102
+ fd = f(d_gs);
103
  }
104
  }
105
 
106
  return f((a_gs + b_gs) / 2.0);
107
  }
108
 
109
+ // Function to compute the theoretical min value - optimized similarly
110
  double compute_theoretical_min(double a, double y, double beta) {
111
+ // Exit early if parameters would cause division by zero or other issues
112
+ if (a <= 0 || y <= 0 || beta < 0 || beta > 1) {
113
+ return 0.0;
114
+ }
115
+
116
+ // Precompute constants
117
+ double y_effective = apply_y_condition(y);
118
+ double beta_term = y_effective * beta * (a - 1);
119
+ double y_term = y_effective - 1.0;
120
+
121
+ auto f = [a, beta_term, y_term, y_effective](double t) -> double {
122
+ double at_plus_1 = a * t + 1.0;
123
+ double numerator = beta_term * t + at_plus_1 * (y_term * t - 1.0);
124
+ double denominator = at_plus_1 * (t * t + t) * y_effective;
125
+ return numerator / denominator;
126
  };
127
 
128
+ // Initial bound check
129
+ if (a <= 0) return 0.0;
130
+
131
+ // Find midpoint of range as starting guess
132
+ double best_t = -0.5 / a;
133
  double best_val = f(best_t);
134
 
135
  // Initial grid search over the range (-1/a, 0)
136
+ const int num_grid_points = 50; // Reduced from 200
137
+ double range = 0.998/a;
138
+ double start = -0.999/a;
139
+
140
  for (int i = 1; i < num_grid_points; ++i) {
141
+ double t = start + range * i / (num_grid_points - 1);
142
+ if (t >= 0 || t <= -1.0/a) continue;
 
143
 
144
  double val = f(t);
145
  if (val < best_val) {
 
149
  }
150
 
151
  // Refine with golden section search
152
+ double a_gs = start;
153
+ double b_gs = -0.001/a;
154
+ const double golden_ratio = 1.618033988749895;
155
+ const double tolerance = 1e-6; // Increased from 1e-10
156
 
157
  double c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
158
  double d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
159
+ double fc = f(c_gs);
160
+ double fd = f(d_gs);
161
 
162
+ // Limited iterations
163
+ for (int iter = 0; iter < 20 && std::abs(b_gs - a_gs) > tolerance; ++iter) {
164
+ if (fc < fd) {
165
  b_gs = d_gs;
166
  d_gs = c_gs;
167
  c_gs = b_gs - (b_gs - a_gs) / golden_ratio;
168
+ fd = fc;
169
+ fc = f(c_gs);
170
  } else {
171
  a_gs = c_gs;
172
  c_gs = d_gs;
173
  d_gs = a_gs + (b_gs - a_gs) / golden_ratio;
174
+ fc = fd;
175
+ fd = f(d_gs);
176
  }
177
  }
178
 
179
  return f((a_gs + b_gs) / 2.0);
180
  }
181
 
182
+ // Fast eigendecomposition of a symmetric matrix using Jacobi method
183
+ void eigen_decomposition(const std::vector<std::vector<double>>& matrix,
184
+ std::vector<double>& eigenvalues) {
185
+ int n = matrix.size();
186
+ eigenvalues.resize(n);
187
+
188
+ // Copy matrix for computation
189
+ std::vector<std::vector<double>> a = matrix;
190
+
191
+ // Allocate temp arrays
192
+ std::vector<double> d(n);
193
+ std::vector<double> z(n, 0.0);
194
+
195
+ // Initialize eigenvalues with diagonal elements
196
+ for (int i = 0; i < n; i++) {
197
+ d[i] = a[i][i];
198
+ }
199
+
200
+ // Main algorithm: Jacobi rotations
201
+ const int MAX_ITER = 50; // Limit iterations for speed
202
+ for (int iter = 0; iter < MAX_ITER; iter++) {
203
+ // Sum off-diagonal elements
204
+ double sum = 0.0;
205
+ for (int i = 0; i < n-1; i++) {
206
+ for (int j = i+1; j < n; j++) {
207
+ sum += std::abs(a[i][j]);
208
+ }
209
+ }
210
+
211
+ // Check for convergence
212
+ if (sum < 1e-8) break;
213
+
214
+ for (int p = 0; p < n-1; p++) {
215
+ for (int q = p+1; q < n; q++) {
216
+ double theta, t, c, s;
217
+
218
+ // Skip very small elements
219
+ if (std::abs(a[p][q]) < 1e-10) continue;
220
+
221
+ // Compute rotation angle
222
+ theta = 0.5 * std::atan2(2*a[p][q], a[p][p] - a[q][q]);
223
+ c = std::cos(theta);
224
+ s = std::sin(theta);
225
+ t = std::tan(theta);
226
+
227
+ // Update diagonal elements
228
+ double h = t * a[p][q];
229
+ z[p] -= h;
230
+ z[q] += h;
231
+ d[p] -= h;
232
+ d[q] += h;
233
+
234
+ // Set off-diagonal element to zero
235
+ a[p][q] = 0.0;
236
+
237
+ // Update other elements
238
+ for (int i = 0; i < p; i++) {
239
+ double g = a[i][p], h = a[i][q];
240
+ a[i][p] = c*g - s*h;
241
+ a[i][q] = s*g + c*h;
242
+ }
243
+
244
+ for (int i = p+1; i < q; i++) {
245
+ double g = a[p][i], h = a[i][q];
246
+ a[p][i] = c*g - s*h;
247
+ a[i][q] = s*g + c*h;
248
+ }
249
+
250
+ for (int i = q+1; i < n; i++) {
251
+ double g = a[p][i], h = a[q][i];
252
+ a[p][i] = c*g - s*h;
253
+ a[q][i] = s*g + c*h;
254
+ }
255
+ }
256
+ }
257
+
258
+ // Update eigenvalues
259
+ for (int i = 0; i < n; i++) {
260
+ d[i] += z[i];
261
+ z[i] = 0.0;
262
+ }
263
+ }
264
+
265
+ // Return eigenvalues
266
+ eigenvalues = d;
267
+ }
268
+
269
+ // Optimized matrix multiplication: C = A * B
270
+ void matrix_multiply(const std::vector<std::vector<double>>& A,
271
+ const std::vector<std::vector<double>>& B,
272
+ std::vector<std::vector<double>>& C) {
273
+ int m = A.size();
274
+ int n = B[0].size();
275
+ int k = A[0].size();
276
+
277
+ C.resize(m, std::vector<double>(n, 0.0));
278
+
279
+ // Transpose B for better cache locality
280
+ std::vector<std::vector<double>> B_t(n, std::vector<double>(k, 0.0));
281
+ for (int i = 0; i < k; i++) {
282
+ for (int j = 0; j < n; j++) {
283
+ B_t[j][i] = B[i][j];
284
+ }
285
+ }
286
+
287
+ // Multiply with transposed B
288
+ for (int i = 0; i < m; i++) {
289
+ for (int j = 0; j < n; j++) {
290
+ double sum = 0.0;
291
+ for (int l = 0; l < k; l++) {
292
+ sum += A[i][l] * B_t[j][l];
293
+ }
294
+ C[i][j] = sum;
295
+ }
296
+ }
297
+ }
298
+
299
+ // Highly optimized eigenvalue computation for a given beta
300
  std::tuple<double, double> compute_eigenvalues_for_beta(double z_a, double y, double beta, int n, int seed) {
 
301
  double y_effective = apply_y_condition(y);
302
 
303
  // Set random seed
 
307
  // Compute dimension p based on aspect ratio y
308
  int p = static_cast<int>(y_effective * n);
309
 
310
+ // Generate random matrix X (with pre-allocation)
311
+ std::vector<std::vector<double>> X(p, std::vector<double>(n, 0.0));
312
  for (int i = 0; i < p; i++) {
313
  for (int j = 0; j < n; j++) {
314
+ X[i][j] = norm(gen);
315
  }
316
  }
317
 
318
+ // Compute X * X^T / n - optimized matrix multiplication
319
+ std::vector<std::vector<double>> S_n(p, std::vector<double>(p, 0.0));
320
+ for (int i = 0; i < p; i++) {
321
+ for (int j = 0; j <= i; j++) { // Compute only lower triangle
322
+ double sum = 0.0;
323
+ for (int k = 0; k < n; k++) {
324
+ sum += X[i][k] * X[j][k];
325
+ }
326
+ sum /= n;
327
+ S_n[i][j] = sum;
328
+ if (i != j) S_n[j][i] = sum; // Mirror to upper triangle
329
+ }
330
+ }
331
 
332
  // Build T_n diagonal matrix
333
  int k = static_cast<int>(std::floor(beta * p));
 
338
  // Shuffle diagonal entries
339
  std::shuffle(diags.begin(), diags.end(), gen);
340
 
341
+ // Create T_sqrt diagonal matrix
342
+ std::vector<double> t_sqrt_diag(p);
 
 
343
  for (int i = 0; i < p; i++) {
344
+ t_sqrt_diag[i] = std::sqrt(diags[i]);
 
 
345
  }
346
 
347
+ // Compute B = T_sqrt * S_n * T_sqrt directly without full matrix multiplication
348
+ // (optimize for diagonal T_sqrt)
349
+ std::vector<std::vector<double>> B(p, std::vector<double>(p, 0.0));
350
+ for (int i = 0; i < p; i++) {
351
+ for (int j = 0; j < p; j++) {
352
+ B[i][j] = S_n[i][j] * t_sqrt_diag[i] * t_sqrt_diag[j];
353
+ }
354
+ }
355
 
356
+ // Compute eigenvalues efficiently
357
+ std::vector<double> eigenvalues;
358
+ eigen_decomposition(B, eigenvalues);
359
 
360
+ // Sort eigenvalues
361
+ std::sort(eigenvalues.begin(), eigenvalues.end());
 
362
 
363
+ // Return min and max
364
+ return std::make_tuple(eigenvalues.front(), eigenvalues.back());
365
  }
366
 
367
+ // Fast computation of eigenvalue support boundaries
368
  std::tuple<std::vector<double>, std::vector<double>, std::vector<double>, std::vector<double>>
369
  compute_eigenvalue_support_boundaries(double z_a, double y, const std::vector<double>& beta_values,
370
  int n_samples, int seeds) {
 
374
  std::vector<double> theoretical_min_values(num_betas, 0.0);
375
  std::vector<double> theoretical_max_values(num_betas, 0.0);
376
 
377
+ // Pre-compute theoretical values for all betas (can be done in parallel)
378
+ #pragma omp parallel for if(num_betas > 10)
379
  for (size_t i = 0; i < num_betas; i++) {
380
  double beta = beta_values[i];
 
 
381
  theoretical_max_values[i] = compute_theoretical_max(z_a, y, beta);
382
  theoretical_min_values[i] = compute_theoretical_min(z_a, y, beta);
383
+ }
384
+
385
+ // Compute eigenvalues for all betas (more expensive)
386
+ for (size_t i = 0; i < num_betas; i++) {
387
+ double beta = beta_values[i];
388
 
389
  std::vector<double> min_vals;
390
  std::vector<double> max_vals;
391
 
392
+ // Use just one seed for speed if the seeds parameter is small
393
+ int actual_seeds = (seeds <= 2) ? 1 : seeds;
394
+
395
+ for (int seed = 0; seed < actual_seeds; seed++) {
396
  auto [min_eig, max_eig] = compute_eigenvalues_for_beta(z_a, y, beta, n_samples, seed);
397
  min_vals.push_back(min_eig);
398
  max_vals.push_back(max_eig);
 
403
  for (double val : min_vals) min_sum += val;
404
  for (double val : max_vals) max_sum += val;
405
 
406
+ min_eigenvalues[i] = min_sum / min_vals.size();
407
+ max_eigenvalues[i] = max_sum / max_vals.size();
408
  }
409
 
410
  return std::make_tuple(min_eigenvalues, max_eigenvalues, theoretical_min_values, theoretical_max_values);
411
  }
412
 
413
+ // Very optimized version to find zeros of discriminant
414
  std::vector<double> find_z_at_discriminant_zero(double z_a, double y, double beta,
415
  double z_min, double z_max, int steps) {
416
  std::vector<double> roots_found;
417
  double y_effective = apply_y_condition(y);
418
 
419
+ // Adaptive step size for better accuracy in important regions
420
+ double step = (z_max - z_min) / (steps - 1);
 
 
 
 
421
 
422
+ // Evaluate discriminant at first point
423
+ double z_prev = z_min;
424
+ double f_prev = discriminant_func(z_prev, beta, z_a, y_effective);
 
 
425
 
426
+ // Scan through the range looking for sign changes
427
+ for (int i = 1; i < steps; ++i) {
428
+ double z_curr = z_min + i * step;
429
+ double f_curr = discriminant_func(z_curr, beta, z_a, y_effective);
430
 
431
+ if (std::isnan(f_prev) || std::isnan(f_curr)) {
432
+ z_prev = z_curr;
433
+ f_prev = f_curr;
434
  continue;
435
  }
436
 
437
+ // Check for exact zero
438
+ if (f_prev == 0.0) {
439
+ roots_found.push_back(z_prev);
440
+ }
441
+ else if (f_curr == 0.0) {
442
+ roots_found.push_back(z_curr);
443
+ }
444
+ // Check for sign change
445
+ else if (f_prev * f_curr < 0) {
446
+ // Binary search for more precise zero
447
+ double zl = z_prev;
448
+ double zr = z_curr;
449
+ double fl = f_prev;
450
+ double fr = f_curr;
451
+
452
+ // Fewer iterations, still good precision
453
+ for (int iter = 0; iter < 20; iter++) {
454
+ double zm = (zl + zr) / 2;
455
+ double fm = discriminant_func(zm, beta, z_a, y_effective);
456
+
457
+ if (fm == 0.0 || std::abs(zr - zl) < 1e-8) {
458
+ roots_found.push_back(zm);
459
  break;
460
  }
461
+
462
+ if ((fm < 0 && fl < 0) || (fm > 0 && fl > 0)) {
463
+ zl = zm;
464
+ fl = fm;
465
  } else {
466
+ zr = zm;
467
+ fr = fm;
468
  }
469
  }
470
+
471
+ if (std::abs(zr - zl) >= 1e-8) {
472
+ // Add the midpoint if we didn't converge fully
473
+ roots_found.push_back((zl + zr) / 2);
474
+ }
475
  }
476
+
477
+ z_prev = z_curr;
478
+ f_prev = f_curr;
479
  }
480
 
481
  return roots_found;
482
  }
483
 
484
+ // Compute z bounds but with fewer steps for speed
485
  std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>
486
  sweep_beta_and_find_z_bounds(double z_a, double y, double z_min, double z_max,
487
  int beta_steps, int z_steps) {
 
489
  std::vector<double> z_min_values(beta_steps);
490
  std::vector<double> z_max_values(beta_steps);
491
 
492
+ // Use fewer z steps for faster computation
493
+ int actual_z_steps = std::min(z_steps, 10000);
494
+
495
  double beta_step = 1.0 / (beta_steps - 1);
496
  for (int i = 0; i < beta_steps; i++) {
497
  betas[i] = i * beta_step;
498
 
499
+ std::vector<double> roots = find_z_at_discriminant_zero(z_a, y, betas[i], z_min, z_max, actual_z_steps);
500
 
501
  if (roots.empty()) {
502
  z_min_values[i] = std::numeric_limits<double>::quiet_NaN();
 
514
  return std::make_tuple(betas, z_min_values, z_max_values);
515
  }
516
 
517
+ // Fast implementations of curve computations
518
  std::vector<double> compute_high_y_curve(const std::vector<double>& betas, double z_a, double y) {
519
  double y_effective = apply_y_condition(y);
520
  size_t n = betas.size();
 
524
  double denominator = 1.0 - 2.0 * a;
525
 
526
  if (std::abs(denominator) < 1e-10) {
 
527
  std::fill(result.begin(), result.end(), std::numeric_limits<double>::quiet_NaN());
528
  return result;
529
  }
530
 
531
+ // Precompute constants
532
+ double term1 = -2.0 * a * y_effective;
533
+ double term2 = -2.0 * a * (2.0 * a - 1.0);
534
+ double term3 = -4.0 * a * (a - 1.0) * y_effective;
535
+
536
  for (size_t i = 0; i < n; i++) {
537
  double beta = betas[i];
538
+ double numerator = term3 * beta + term1 + term2;
539
  result[i] = numerator / denominator;
540
  }
541
 
542
  return result;
543
  }
544
 
 
545
  std::vector<double> compute_alternate_low_expr(const std::vector<double>& betas, double z_a, double y) {
546
  double y_effective = apply_y_condition(y);
547
  size_t n = betas.size();
548
  std::vector<double> result(n);
549
 
550
+ // Precompute constants
551
+ double term1 = -2.0 * z_a * (1.0 - y_effective);
552
+ double term2 = -2.0 * z_a * z_a;
553
+ double term3 = z_a * y_effective * (z_a - 1.0);
554
+ double denominator = 2.0 + 2.0 * z_a;
555
+
556
  for (size_t i = 0; i < n; i++) {
557
  double beta = betas[i];
558
+ result[i] = (term3 * beta + term1 + term2) / denominator;
559
  }
560
 
561
  return result;
562
  }
563
 
 
564
  std::vector<double> compute_max_k_expression(const std::vector<double>& betas, double z_a, double y) {
565
  size_t n = betas.size();
566
  std::vector<double> result(n);
567
 
568
+ // Since we've optimized compute_theoretical_max, just call it in a loop
569
+ #pragma omp parallel for if(n > 20)
570
  for (size_t i = 0; i < n; i++) {
571
  result[i] = compute_theoretical_max(z_a, y, betas[i]);
572
  }
 
574
  return result;
575
  }
576
 
 
577
  std::vector<double> compute_min_t_expression(const std::vector<double>& betas, double z_a, double y) {
578
  size_t n = betas.size();
579
  std::vector<double> result(n);
580
 
581
+ // Similarly for min
582
+ #pragma omp parallel for if(n > 20)
583
  for (size_t i = 0; i < n; i++) {
584
  result[i] = compute_theoretical_min(z_a, y, betas[i]);
585
  }
 
587
  return result;
588
  }
589
 
590
+ // Generate eigenvalue distribution - faster implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  std::vector<double> generate_eigenvalue_distribution(double beta, double y, double z_a, int n, int seed) {
 
592
  double y_effective = apply_y_condition(y);
593
 
594
  // Set random seed
 
599
  int p = static_cast<int>(y_effective * n);
600
 
601
  // Generate random matrix X
602
+ std::vector<std::vector<double>> X(p, std::vector<double>(n, 0.0));
603
  for (int i = 0; i < p; i++) {
604
  for (int j = 0; j < n; j++) {
605
+ X[i][j] = norm(gen);
606
  }
607
  }
608
 
609
+ // Compute S_n = X * X^T / n efficiently
610
+ std::vector<std::vector<double>> S_n(p, std::vector<double>(p, 0.0));
611
+ for (int i = 0; i < p; i++) {
612
+ for (int j = 0; j <= i; j++) { // Compute only lower triangle
613
+ double sum = 0.0;
614
+ for (int k = 0; k < n; k++) {
615
+ sum += X[i][k] * X[j][k];
616
+ }
617
+ sum /= n;
618
+ S_n[i][j] = sum;
619
+ if (i != j) S_n[j][i] = sum; // Mirror to upper triangle
620
+ }
621
+ }
622
 
623
  // Build T_n diagonal matrix
624
  int k = static_cast<int>(std::floor(beta * p));
 
629
  // Shuffle diagonal entries
630
  std::shuffle(diags.begin(), diags.end(), gen);
631
 
632
+ // Compute B_n = S_n * diag(T_n) efficiently
633
+ std::vector<std::vector<double>> B_n(p, std::vector<double>(p, 0.0));
634
  for (int i = 0; i < p; i++) {
635
+ for (int j = 0; j < p; j++) {
636
+ B_n[i][j] = S_n[i][j] * diags[j];
637
+ }
638
  }
639
 
640
+ // Compute eigenvalues efficiently
641
+ std::vector<double> eigenvalues;
642
+ eigen_decomposition(B_n, eigenvalues);
 
 
 
 
 
 
 
 
643
 
644
+ // Sort eigenvalues
645
  std::sort(eigenvalues.begin(), eigenvalues.end());
646
  return eigenvalues;
647
  }