MilesCranmer commited on
Commit
b31f594
·
unverified ·
1 Parent(s): bce8e64

Clean up colab notebook

Browse files
Files changed (1) hide show
  1. examples/pysr_demo.ipynb +35 -36
examples/pysr_demo.ipynb CHANGED
@@ -109,11 +109,11 @@
109
  "source": [
110
  "from julia import Julia\n",
111
  "\n",
112
- "julia = Julia(compiled_modules=False, threads='auto', optimize=3)\n",
113
  "from julia import Main\n",
114
  "from julia.tools import redirect_output_streams\n",
115
  "\n",
116
- "redirect_output_streams()\n"
117
  ]
118
  },
119
  {
@@ -137,7 +137,8 @@
137
  "source": [
138
  "import pysr\n",
139
  "\n",
140
- "pysr.install(precompile=False)\n"
 
141
  ]
142
  },
143
  {
@@ -157,7 +158,7 @@
157
  "from torch.nn import functional as F\n",
158
  "from torch.utils.data import DataLoader, TensorDataset\n",
159
  "import pytorch_lightning as pl\n",
160
- "from sklearn.model_selection import train_test_split\n"
161
  ]
162
  },
163
  {
@@ -191,7 +192,7 @@
191
  "# Dataset\n",
192
  "np.random.seed(0)\n",
193
  "X = 2 * np.random.randn(100, 5)\n",
194
- "y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 2\n"
195
  ]
196
  },
197
  {
@@ -215,7 +216,7 @@
215
  " populations=30,\n",
216
  " procs=4,\n",
217
  " model_selection=\"best\",\n",
218
- ")\n"
219
  ]
220
  },
221
  {
@@ -246,7 +247,7 @@
246
  " **default_pysr_params\n",
247
  ")\n",
248
  "\n",
249
- "model.fit(X, y)\n"
250
  ]
251
  },
252
  {
@@ -266,7 +267,7 @@
266
  },
267
  "outputs": [],
268
  "source": [
269
- "model\n"
270
  ]
271
  },
272
  {
@@ -286,7 +287,7 @@
286
  },
287
  "outputs": [],
288
  "source": [
289
- "model.sympy()\n"
290
  ]
291
  },
292
  {
@@ -306,7 +307,7 @@
306
  },
307
  "outputs": [],
308
  "source": [
309
- "model.sympy(2)\n"
310
  ]
311
  },
312
  {
@@ -335,7 +336,7 @@
335
  },
336
  "outputs": [],
337
  "source": [
338
- "model.latex()\n"
339
  ]
340
  },
341
  {
@@ -361,7 +362,7 @@
361
  "ypredict_simpler = model.predict(X, 2)\n",
362
  "\n",
363
  "print(\"Default selection MSE:\", np.power(ypredict - y, 2).mean())\n",
364
- "print(\"Manual selection MSE for index 2:\", np.power(ypredict_simpler - y, 2).mean())\n"
365
  ]
366
  },
367
  {
@@ -395,7 +396,7 @@
395
  },
396
  "outputs": [],
397
  "source": [
398
- "y = X[:, 0] ** 4 - 2\n"
399
  ]
400
  },
401
  {
@@ -425,7 +426,7 @@
425
  " unary_operators=[\"cos\", \"exp\", \"sin\", \"quart(x) = x^4\"],\n",
426
  " extra_sympy_mappings={\"quart\": lambda x: x**4},\n",
427
  ")\n",
428
- "model.fit(X, y)\n"
429
  ]
430
  },
431
  {
@@ -436,7 +437,7 @@
436
  },
437
  "outputs": [],
438
  "source": [
439
- "model.sympy()\n"
440
  ]
441
  },
442
  {
@@ -538,7 +539,7 @@
538
  "X = 2 * np.random.rand(N, 5)\n",
539
  "sigma = np.random.rand(N) * (5 - 0.1) + 0.1\n",
540
  "eps = sigma * np.random.randn(N)\n",
541
- "y = 5 * np.cos(3.5 * X[:, 0]) - 1.3 + eps\n"
542
  ]
543
  },
544
  {
@@ -560,7 +561,7 @@
560
  "source": [
561
  "plt.scatter(X[:, 0], y, alpha=0.2)\n",
562
  "plt.xlabel(\"$x_0$\")\n",
563
- "plt.ylabel(\"$y$\")\n"
564
  ]
565
  },
566
  {
@@ -580,7 +581,7 @@
580
  },
581
  "outputs": [],
582
  "source": [
583
- "weights = 1 / sigma ** 2\n"
584
  ]
585
  },
586
  {
@@ -591,7 +592,7 @@
591
  },
592
  "outputs": [],
593
  "source": [
594
- "weights[:5]\n"
595
  ]
596
  },
597
  {
@@ -619,7 +620,7 @@
619
  " binary_operators=[\"plus\", \"mult\"],\n",
620
  " unary_operators=[\"cos\"],\n",
621
  ")\n",
622
- "model.fit(X, y, weights=weights)\n"
623
  ]
624
  },
625
  {
@@ -639,7 +640,7 @@
639
  },
640
  "outputs": [],
641
  "source": [
642
- "model\n"
643
  ]
644
  },
645
  {
@@ -662,7 +663,7 @@
662
  "best_idx = model.equations_.query(\n",
663
  " f\"loss < {2 * model.equations_.loss.min()}\"\n",
664
  ").score.idxmax()\n",
665
- "model.sympy(best_idx)\n"
666
  ]
667
  },
668
  {
@@ -693,7 +694,7 @@
693
  "source": [
694
  "plt.scatter(X[:, 0], y, alpha=0.1)\n",
695
  "y_prediction = model.predict(X, index=best_idx)\n",
696
- "plt.scatter(X[:, 0], y_prediction)\n"
697
  ]
698
  },
699
  {
@@ -719,7 +720,7 @@
719
  "outputs": [],
720
  "source": [
721
  "X = 2 * np.random.randn(100, 5)\n",
722
- "y = 1 / X[:, [0, 1, 2]]\n"
723
  ]
724
  },
725
  {
@@ -1024,7 +1025,7 @@
1024
  "y_i = X[..., 0] ** 2 + 6 * np.cos(2 * X[..., 2])\n",
1025
  "y = np.sum(y_i, axis=1) / y_i.shape[1]\n",
1026
  "z = y**2\n",
1027
- "X.shape, y.shape\n"
1028
  ]
1029
  },
1030
  {
@@ -1117,7 +1118,7 @@
1117
  " ),\n",
1118
  " \"interval\": \"step\",\n",
1119
  " }\n",
1120
- " return [optimizer], [scheduler]\n"
1121
  ]
1122
  },
1123
  {
@@ -1152,7 +1153,7 @@
1152
  "train_set = TensorDataset(X_train, z_train)\n",
1153
  "train = DataLoader(train_set, batch_size=128, num_workers=2)\n",
1154
  "test_set = TensorDataset(X_test, z_test)\n",
1155
- "test = DataLoader(test_set, batch_size=256, num_workers=2)\n"
1156
  ]
1157
  },
1158
  {
@@ -1184,7 +1185,7 @@
1184
  "pl.seed_everything(0)\n",
1185
  "model = SumNet()\n",
1186
  "model.total_steps = total_steps\n",
1187
- "model.max_lr = 1e-2\n"
1188
  ]
1189
  },
1190
  {
@@ -1204,7 +1205,7 @@
1204
  },
1205
  "outputs": [],
1206
  "source": [
1207
- "trainer = pl.Trainer(max_steps=total_steps, gpus=1, benchmark=True)\n"
1208
  ]
1209
  },
1210
  {
@@ -1224,7 +1225,7 @@
1224
  },
1225
  "outputs": [],
1226
  "source": [
1227
- "trainer.fit(model, train_dataloaders=train, val_dataloaders=test)\n"
1228
  ]
1229
  },
1230
  {
@@ -1254,7 +1255,7 @@
1254
  "y_for_pysr = torch.sum(y_i_for_pysr, dim=1) / y_i_for_pysr.shape[1]\n",
1255
  "z_for_pysr = zt[idx] # Use true values.\n",
1256
  "\n",
1257
- "X_for_pysr.shape, y_i_for_pysr.shape\n"
1258
  ]
1259
  },
1260
  {
@@ -1287,7 +1288,7 @@
1287
  " binary_operators=[\"plus\", \"sub\", \"mult\"],\n",
1288
  " unary_operators=[\"cos\", \"square\", \"neg\"],\n",
1289
  ")\n",
1290
- "model.fit(X=tmpX[idx2], y=tmpy[idx2])\n"
1291
  ]
1292
  },
1293
  {
@@ -1319,7 +1320,7 @@
1319
  },
1320
  "outputs": [],
1321
  "source": [
1322
- "model\n"
1323
  ]
1324
  },
1325
  {
@@ -1375,9 +1376,7 @@
1375
  },
1376
  "gpuClass": "standard",
1377
  "kernelspec": {
1378
- "display_name": "Python (main_ipynb)",
1379
- "language": "python",
1380
- "name": "main_ipynb"
1381
  },
1382
  "language_info": {
1383
  "name": "python",
 
109
  "source": [
110
  "from julia import Julia\n",
111
  "\n",
112
+ "julia = Julia(compiled_modules=False, threads='auto')\n",
113
  "from julia import Main\n",
114
  "from julia.tools import redirect_output_streams\n",
115
  "\n",
116
+ "redirect_output_streams()"
117
  ]
118
  },
119
  {
 
137
  "source": [
138
  "import pysr\n",
139
  "\n",
140
+ "# We don't precompile in colab because compiled modules are incompatible static Python libraries:\n",
141
+ "pysr.install(precompile=False)"
142
  ]
143
  },
144
  {
 
158
  "from torch.nn import functional as F\n",
159
  "from torch.utils.data import DataLoader, TensorDataset\n",
160
  "import pytorch_lightning as pl\n",
161
+ "from sklearn.model_selection import train_test_split"
162
  ]
163
  },
164
  {
 
192
  "# Dataset\n",
193
  "np.random.seed(0)\n",
194
  "X = 2 * np.random.randn(100, 5)\n",
195
+ "y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 2"
196
  ]
197
  },
198
  {
 
216
  " populations=30,\n",
217
  " procs=4,\n",
218
  " model_selection=\"best\",\n",
219
+ ")"
220
  ]
221
  },
222
  {
 
247
  " **default_pysr_params\n",
248
  ")\n",
249
  "\n",
250
+ "model.fit(X, y)"
251
  ]
252
  },
253
  {
 
267
  },
268
  "outputs": [],
269
  "source": [
270
+ "model"
271
  ]
272
  },
273
  {
 
287
  },
288
  "outputs": [],
289
  "source": [
290
+ "model.sympy()"
291
  ]
292
  },
293
  {
 
307
  },
308
  "outputs": [],
309
  "source": [
310
+ "model.sympy(2)"
311
  ]
312
  },
313
  {
 
336
  },
337
  "outputs": [],
338
  "source": [
339
+ "model.latex()"
340
  ]
341
  },
342
  {
 
362
  "ypredict_simpler = model.predict(X, 2)\n",
363
  "\n",
364
  "print(\"Default selection MSE:\", np.power(ypredict - y, 2).mean())\n",
365
+ "print(\"Manual selection MSE for index 2:\", np.power(ypredict_simpler - y, 2).mean())"
366
  ]
367
  },
368
  {
 
396
  },
397
  "outputs": [],
398
  "source": [
399
+ "y = X[:, 0] ** 4 - 2"
400
  ]
401
  },
402
  {
 
426
  " unary_operators=[\"cos\", \"exp\", \"sin\", \"quart(x) = x^4\"],\n",
427
  " extra_sympy_mappings={\"quart\": lambda x: x**4},\n",
428
  ")\n",
429
+ "model.fit(X, y)"
430
  ]
431
  },
432
  {
 
437
  },
438
  "outputs": [],
439
  "source": [
440
+ "model.sympy()"
441
  ]
442
  },
443
  {
 
539
  "X = 2 * np.random.rand(N, 5)\n",
540
  "sigma = np.random.rand(N) * (5 - 0.1) + 0.1\n",
541
  "eps = sigma * np.random.randn(N)\n",
542
+ "y = 5 * np.cos(3.5 * X[:, 0]) - 1.3 + eps"
543
  ]
544
  },
545
  {
 
561
  "source": [
562
  "plt.scatter(X[:, 0], y, alpha=0.2)\n",
563
  "plt.xlabel(\"$x_0$\")\n",
564
+ "plt.ylabel(\"$y$\")"
565
  ]
566
  },
567
  {
 
581
  },
582
  "outputs": [],
583
  "source": [
584
+ "weights = 1 / sigma ** 2"
585
  ]
586
  },
587
  {
 
592
  },
593
  "outputs": [],
594
  "source": [
595
+ "weights[:5]"
596
  ]
597
  },
598
  {
 
620
  " binary_operators=[\"plus\", \"mult\"],\n",
621
  " unary_operators=[\"cos\"],\n",
622
  ")\n",
623
+ "model.fit(X, y, weights=weights)"
624
  ]
625
  },
626
  {
 
640
  },
641
  "outputs": [],
642
  "source": [
643
+ "model"
644
  ]
645
  },
646
  {
 
663
  "best_idx = model.equations_.query(\n",
664
  " f\"loss < {2 * model.equations_.loss.min()}\"\n",
665
  ").score.idxmax()\n",
666
+ "model.sympy(best_idx)"
667
  ]
668
  },
669
  {
 
694
  "source": [
695
  "plt.scatter(X[:, 0], y, alpha=0.1)\n",
696
  "y_prediction = model.predict(X, index=best_idx)\n",
697
+ "plt.scatter(X[:, 0], y_prediction)"
698
  ]
699
  },
700
  {
 
720
  "outputs": [],
721
  "source": [
722
  "X = 2 * np.random.randn(100, 5)\n",
723
+ "y = 1 / X[:, [0, 1, 2]]"
724
  ]
725
  },
726
  {
 
1025
  "y_i = X[..., 0] ** 2 + 6 * np.cos(2 * X[..., 2])\n",
1026
  "y = np.sum(y_i, axis=1) / y_i.shape[1]\n",
1027
  "z = y**2\n",
1028
+ "X.shape, y.shape"
1029
  ]
1030
  },
1031
  {
 
1118
  " ),\n",
1119
  " \"interval\": \"step\",\n",
1120
  " }\n",
1121
+ " return [optimizer], [scheduler]"
1122
  ]
1123
  },
1124
  {
 
1153
  "train_set = TensorDataset(X_train, z_train)\n",
1154
  "train = DataLoader(train_set, batch_size=128, num_workers=2)\n",
1155
  "test_set = TensorDataset(X_test, z_test)\n",
1156
+ "test = DataLoader(test_set, batch_size=256, num_workers=2)"
1157
  ]
1158
  },
1159
  {
 
1185
  "pl.seed_everything(0)\n",
1186
  "model = SumNet()\n",
1187
  "model.total_steps = total_steps\n",
1188
+ "model.max_lr = 1e-2"
1189
  ]
1190
  },
1191
  {
 
1205
  },
1206
  "outputs": [],
1207
  "source": [
1208
+ "trainer = pl.Trainer(max_steps=total_steps, gpus=1, benchmark=True)"
1209
  ]
1210
  },
1211
  {
 
1225
  },
1226
  "outputs": [],
1227
  "source": [
1228
+ "trainer.fit(model, train_dataloaders=train, val_dataloaders=test)"
1229
  ]
1230
  },
1231
  {
 
1255
  "y_for_pysr = torch.sum(y_i_for_pysr, dim=1) / y_i_for_pysr.shape[1]\n",
1256
  "z_for_pysr = zt[idx] # Use true values.\n",
1257
  "\n",
1258
+ "X_for_pysr.shape, y_i_for_pysr.shape"
1259
  ]
1260
  },
1261
  {
 
1288
  " binary_operators=[\"plus\", \"sub\", \"mult\"],\n",
1289
  " unary_operators=[\"cos\", \"square\", \"neg\"],\n",
1290
  ")\n",
1291
+ "model.fit(X=tmpX[idx2], y=tmpy[idx2])"
1292
  ]
1293
  },
1294
  {
 
1320
  },
1321
  "outputs": [],
1322
  "source": [
1323
+ "model"
1324
  ]
1325
  },
1326
  {
 
1376
  },
1377
  "gpuClass": "standard",
1378
  "kernelspec": {
1379
+ "language": "python"
 
 
1380
  },
1381
  "language_info": {
1382
  "name": "python",