MilesCranmer commited on
Commit
0683428
1 Parent(s): bd1838a

Add warning for large numbers of datapoints

Browse files
Files changed (2) hide show
  1. pysr/sr.py +5 -0
  2. setup.py +1 -1
pysr/sr.py CHANGED
@@ -11,6 +11,7 @@ import tempfile
11
  import shutil
12
  from pathlib import Path
13
  from datetime import datetime
 
14
 
15
 
16
  global_equation_file = 'hall_of_fame.csv'
@@ -221,6 +222,10 @@ def pysr(X=None, y=None, weights=None,
221
  if use_custom_variable_names:
222
  assert len(variable_names) == X.shape[1]
223
 
 
 
 
 
224
  if select_k_features is not None:
225
  selection = run_feature_selection(X, y, select_k_features)
226
  print(f"Using features {selection}")
 
11
  import shutil
12
  from pathlib import Path
13
  from datetime import datetime
14
+ import warnings
15
 
16
 
17
  global_equation_file = 'hall_of_fame.csv'
 
222
  if use_custom_variable_names:
223
  assert len(variable_names) == X.shape[1]
224
 
225
+
226
+ if len(X) > 10000 and not batching:
227
+ warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
228
+
229
  if select_k_features is not None:
230
  selection = run_feature_selection(X, y, select_k_features)
231
  print(f"Using features {selection}")
setup.py CHANGED
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
 
6
  setuptools.setup(
7
  name="pysr", # Replace with your own username
8
- version="0.3.36",
9
  author="Miles Cranmer",
10
  author_email="[email protected]",
11
  description="Simple and efficient symbolic regression",
 
5
 
6
  setuptools.setup(
7
  name="pysr", # Replace with your own username
8
+ version="0.3.37",
9
  author="Miles Cranmer",
10
  author_email="[email protected]",
11
  description="Simple and efficient symbolic regression",