MilesCranmer commited on
Commit
81624b2
1 Parent(s): 90d3ef7

Refactor float preprocessing

Browse files
Files changed (1) hide show
  1. pysr/sr.py +17 -13
pysr/sr.py CHANGED
@@ -2015,17 +2015,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2015
 
2016
  def _read_equation_file(self):
2017
  """Read the hall of fame file created by `SymbolicRegression.jl`."""
2018
- regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
2019
- regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
2020
- regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
2021
- apply_regexp_im = lambda x: regexp_im.sub(r"\1j", x)
2022
- apply_regexp_im_sci = lambda x: regexp_im_sci.sub(r"\1e\2j", x)
2023
- apply_regexp_sci = lambda x: regexp_sci.sub(r"\1e\2", x)
2024
-
2025
- def _replace_im(df):
2026
- df["equation"] = df["equation"].apply(
2027
- lambda x: apply_regexp_sci(apply_regexp_im_sci(apply_regexp_im(x)))
2028
- )
2029
 
2030
  try:
2031
  if self.nout_ > 1:
@@ -2044,7 +2033,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2044
  },
2045
  inplace=True,
2046
  )
2047
- _replace_im(df)
2048
 
2049
  all_outputs.append(df)
2050
  else:
@@ -2060,7 +2049,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2060
  },
2061
  inplace=True,
2062
  )
2063
- _replace_im(all_outputs[-1])
 
 
2064
 
2065
  except FileNotFoundError:
2066
  raise RuntimeError(
@@ -2352,3 +2343,16 @@ def _csv_filename_to_pkl_filename(csv_filename) -> str:
2352
  pkl_basename = base + ".pkl"
2353
 
2354
  return os.path.join(dirname, pkl_basename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2015
 
2016
  def _read_equation_file(self):
2017
  """Read the hall of fame file created by `SymbolicRegression.jl`."""
 
 
 
 
 
 
 
 
 
 
 
2018
 
2019
  try:
2020
  if self.nout_ > 1:
 
2033
  },
2034
  inplace=True,
2035
  )
2036
+ df["equation"] = df["equation"].apply(_preprocess_julia_floats)
2037
 
2038
  all_outputs.append(df)
2039
  else:
 
2049
  },
2050
  inplace=True,
2051
  )
2052
+ all_outputs[-1]["equation"] = all_outputs[-1]["equation"].apply(
2053
+ _preprocess_julia_floats
2054
+ )
2055
 
2056
  except FileNotFoundError:
2057
  raise RuntimeError(
 
2343
  pkl_basename = base + ".pkl"
2344
 
2345
  return os.path.join(dirname, pkl_basename)
2346
+
2347
+
2348
+ _regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
2349
+ _regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
2350
+ _regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
2351
+
2352
+ _apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
2353
+ _apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
2354
+ _apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
2355
+
2356
+
2357
+ def _preprocess_julia_floats(s: str) -> str:
2358
+ return _apply_regexp_sci(_apply_regexp_im_sci(_apply_regexp_im(s)))