MilesCranmer commited on
Commit
f4c2ae8
1 Parent(s): 49ecfff

Preprocess Julia floats before pandas reading

Browse files
Files changed (1) hide show
  1. pysr/sr.py +10 -6
pysr/sr.py CHANGED
@@ -1,5 +1,6 @@
1
  """Define the PySRRegressor scikit-learn interface."""
2
  import copy
 
3
  import os
4
  import sys
5
  import numpy as np
@@ -2023,7 +2024,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2023
  cur_filename = str(self.equation_file_) + f".out{i}" + ".bkup"
2024
  if not os.path.exists(cur_filename):
2025
  cur_filename = str(self.equation_file_) + f".out{i}"
2026
- df = pd.read_csv(cur_filename)
 
 
 
 
2027
  # Rename Complexity column to complexity:
2028
  df.rename(
2029
  columns={
@@ -2033,14 +2038,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2033
  },
2034
  inplace=True,
2035
  )
2036
- df["equation"] = df["equation"].apply(_preprocess_julia_floats)
2037
 
2038
  all_outputs.append(df)
2039
  else:
2040
  filename = str(self.equation_file_) + ".bkup"
2041
  if not os.path.exists(filename):
2042
  filename = str(self.equation_file_)
2043
- all_outputs = [pd.read_csv(filename)]
 
 
 
2044
  all_outputs[-1].rename(
2045
  columns={
2046
  "Complexity": "complexity",
@@ -2049,9 +2056,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2049
  },
2050
  inplace=True,
2051
  )
2052
- all_outputs[-1]["equation"] = all_outputs[-1]["equation"].apply(
2053
- _preprocess_julia_floats
2054
- )
2055
 
2056
  except FileNotFoundError:
2057
  raise RuntimeError(
 
1
  """Define the PySRRegressor scikit-learn interface."""
2
  import copy
3
+ from io import StringIO
4
  import os
5
  import sys
6
  import numpy as np
 
2024
  cur_filename = str(self.equation_file_) + f".out{i}" + ".bkup"
2025
  if not os.path.exists(cur_filename):
2026
  cur_filename = str(self.equation_file_) + f".out{i}"
2027
+ with open(cur_filename, "r") as f:
2028
+ buf = f.read()
2029
+ buf = _preprocess_julia_floats(buf)
2030
+ df = pd.read_csv(StringIO(buf))
2031
+
2032
  # Rename Complexity column to complexity:
2033
  df.rename(
2034
  columns={
 
2038
  },
2039
  inplace=True,
2040
  )
 
2041
 
2042
  all_outputs.append(df)
2043
  else:
2044
  filename = str(self.equation_file_) + ".bkup"
2045
  if not os.path.exists(filename):
2046
  filename = str(self.equation_file_)
2047
+ with open(filename, "r") as f:
2048
+ buf = f.read()
2049
+ buf = _preprocess_julia_floats(buf)
2050
+ all_outputs = [pd.read_csv(StringIO(buf))]
2051
  all_outputs[-1].rename(
2052
  columns={
2053
  "Complexity": "complexity",
 
2056
  },
2057
  inplace=True,
2058
  )
 
 
 
2059
 
2060
  except FileNotFoundError:
2061
  raise RuntimeError(