tttc3 commited on
Commit
19ef535
1 Parent(s): ce60798

Fixed typos and ensured tests pass

Browse files
Files changed (1) hide show
  1. pysr/sr.py +17 -16
pysr/sr.py CHANGED
@@ -349,14 +349,14 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
349
  Relative likelihood for mutation to leave the individual.
350
 
351
  weight_mutate_constant : float, default=0.048
352
- Relative likelihood for mutation to change the constant slightly
353
  in a random direction.
354
 
355
  weight_mutate_operator : float, default=0.47
356
  Relative likelihood for mutation to swap an operator.
357
 
358
  weight_randomize : float, default=0.00023
359
- Relative likelihood for mutation to completely delete and then
360
  randomly generate the equation
361
 
362
  weight_simplify : float, default=0.0020
@@ -891,7 +891,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
891
  Raises
892
  ------
893
  ValueError
894
- Raised when on of the following occours: `tournament_selection_n`
895
  parameter is larger than `population_size`; `maxsize` is
896
  less than 7; invalid `extra_jax_mappings` or
897
  `extra_torch_mappings`; invalid optimizer algorithms.
@@ -1005,7 +1005,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1005
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1006
  Target values. Will be cast to X's dtype if necessary.
1007
 
1008
- Xresampled : {ndarray | pandas.DataFrame} of shape
1009
  (n_resampled, n_features), default=None
1010
  Resampled training data used for denoising.
1011
 
@@ -1018,7 +1018,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1018
  Validated training data.
1019
 
1020
  y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
1021
- Validatee target data.
 
 
 
1022
 
1023
  variable_names_validated : list[str] of length n_features
1024
  Validated list of variable names for each feature in `X`.
@@ -1064,7 +1067,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1064
  else:
1065
  raise NotImplementedError("y shape not supported!")
1066
 
1067
- return X, y, variable_names
1068
 
1069
  def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
1070
  """
@@ -1080,7 +1083,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1080
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1081
  Target values. Will be cast to X's dtype if necessary.
1082
 
1083
- Xresampled : {ndarray | pandas.DataFrame} of shape
1084
  (n_resampled, n_features), default=None
1085
  Resampled training data used for denoising.
1086
 
@@ -1119,9 +1122,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1119
  variable_names = [variable_names[i] for i in self.selection_mask_]
1120
 
1121
  # Re-perform data validation and feature name updating
1122
- X, y = self._validate_data(
1123
- X=X, y=y, reset=True, multi_output=True
1124
- )
1125
  # Update feature names with selected variable names
1126
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1127
  print(f"Using features {self.feature_names_in_}")
@@ -1169,7 +1170,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1169
  ImportError
1170
  Raised when the julia backend fails to import a package.
1171
  """
1172
- # Need to be global as we don't want to recreate/reinstate julia for
1173
  # every new instance of PySRRegressor
1174
  global already_ran
1175
  global Main
@@ -1380,7 +1381,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1380
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1381
  Target values. Will be cast to X's dtype if necessary.
1382
 
1383
- Xresampled : {ndarray | pandas.DataFrame} of shape
1384
  (n_resampled, n_features), default=None
1385
  Resampled training data used for denoising.
1386
 
@@ -1413,7 +1414,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1413
 
1414
  # Parameter input validation (for parameters defined in __init__)
1415
  self._validate_params(n_samples=X.shape[0])
1416
- X, y, variable_names = self._validate_fit_params(
1417
  X, y, Xresampled, variable_names
1418
  )
1419
 
@@ -1422,7 +1423,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1422
  X, y, Xresampled, variable_names
1423
  )
1424
 
1425
- # Warn about large feature counts (still warn if feature count is large
1426
  # after running feature selection)
1427
  if self.n_features_in_ >= 10:
1428
  warnings.warn(
@@ -1516,7 +1517,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1516
  """
1517
  Predict y from input X using the equation chosen by `model_selection`.
1518
 
1519
- You may see what equation is used by printing this object. X should
1520
  have the same columns as the training data.
1521
 
1522
  Parameters
@@ -1787,7 +1788,7 @@ def _denoise(X, y, Xresampled=None):
1787
  return X, gpr.predict(X)
1788
 
1789
 
1790
- # Function hasnot been removed only due to usage in module tests
1791
  def _handle_feature_selection(X, select_k_features, y, variable_names):
1792
  if select_k_features is not None:
1793
  selection = run_feature_selection(X, y, select_k_features)
 
349
  Relative likelihood for mutation to leave the individual.
350
 
351
  weight_mutate_constant : float, default=0.048
352
+ Relative likelihood for mutation to change the constant slightly
353
  in a random direction.
354
 
355
  weight_mutate_operator : float, default=0.47
356
  Relative likelihood for mutation to swap an operator.
357
 
358
  weight_randomize : float, default=0.00023
359
+ Relative likelihood for mutation to completely delete and then
360
  randomly generate the equation
361
 
362
  weight_simplify : float, default=0.0020
 
891
  Raises
892
  ------
893
  ValueError
894
+ Raised when on of the following occurs: `tournament_selection_n`
895
  parameter is larger than `population_size`; `maxsize` is
896
  less than 7; invalid `extra_jax_mappings` or
897
  `extra_torch_mappings`; invalid optimizer algorithms.
 
1005
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1006
  Target values. Will be cast to X's dtype if necessary.
1007
 
1008
+ Xresampled : {ndarray | pandas.DataFrame} of shape
1009
  (n_resampled, n_features), default=None
1010
  Resampled training data used for denoising.
1011
 
 
1018
  Validated training data.
1019
 
1020
  y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
1021
+ Validated target data.
1022
+
1023
+ Xresampled : ndarray of shape (n_resampled, n_features)
1024
+ Validated resampled training data used for denoising.
1025
 
1026
  variable_names_validated : list[str] of length n_features
1027
  Validated list of variable names for each feature in `X`.
 
1067
  else:
1068
  raise NotImplementedError("y shape not supported!")
1069
 
1070
+ return X, y, Xresampled, variable_names
1071
 
1072
  def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
1073
  """
 
1083
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1084
  Target values. Will be cast to X's dtype if necessary.
1085
 
1086
+ Xresampled : {ndarray | pandas.DataFrame} of shape
1087
  (n_resampled, n_features), default=None
1088
  Resampled training data used for denoising.
1089
 
 
1122
  variable_names = [variable_names[i] for i in self.selection_mask_]
1123
 
1124
  # Re-perform data validation and feature name updating
1125
+ X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
 
 
1126
  # Update feature names with selected variable names
1127
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1128
  print(f"Using features {self.feature_names_in_}")
 
1170
  ImportError
1171
  Raised when the julia backend fails to import a package.
1172
  """
1173
+ # Need to be global as we don't want to recreate/reinstate julia for
1174
  # every new instance of PySRRegressor
1175
  global already_ran
1176
  global Main
 
1381
  y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
1382
  Target values. Will be cast to X's dtype if necessary.
1383
 
1384
+ Xresampled : {ndarray | pandas.DataFrame} of shape
1385
  (n_resampled, n_features), default=None
1386
  Resampled training data used for denoising.
1387
 
 
1414
 
1415
  # Parameter input validation (for parameters defined in __init__)
1416
  self._validate_params(n_samples=X.shape[0])
1417
+ X, y, Xresampled, variable_names = self._validate_fit_params(
1418
  X, y, Xresampled, variable_names
1419
  )
1420
 
 
1423
  X, y, Xresampled, variable_names
1424
  )
1425
 
1426
+ # Warn about large feature counts (still warn if feature count is large
1427
  # after running feature selection)
1428
  if self.n_features_in_ >= 10:
1429
  warnings.warn(
 
1517
  """
1518
  Predict y from input X using the equation chosen by `model_selection`.
1519
 
1520
+ You may see what equation is used by printing this object. X should
1521
  have the same columns as the training data.
1522
 
1523
  Parameters
 
1788
  return X, gpr.predict(X)
1789
 
1790
 
1791
+ # Function has not been removed only due to usage in module tests
1792
  def _handle_feature_selection(X, select_k_features, y, variable_names):
1793
  if select_k_features is not None:
1794
  selection = run_feature_selection(X, y, select_k_features)