cyrusyc commited on
Commit
ef47233
1 Parent(s): fca8b0e

fine tune eos alloy flow

Browse files
examples/eos_alloy/run_Fe-Ni-Cr.ipynb CHANGED
@@ -80,7 +80,7 @@
80
  "from prefect_dask import DaskTaskRunner\n",
81
  "\n",
82
  "from mlip_arena.models import REGISTRY\n",
83
- "from mlip_arena.tasks.eos_alloy.flow import run_from_db"
84
  ]
85
  },
86
  {
@@ -96,7 +96,7 @@
96
  "text": [
97
  "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/lib/python3.11/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.\n",
98
  "Perhaps you already have a cluster running?\n",
99
- "Hosting the HTTP server on port 36753 instead\n",
100
  " warnings.warn(\n"
101
  ]
102
  },
@@ -110,7 +110,7 @@
110
  "#SBATCH --mem=0\n",
111
  "#SBATCH -t 00:30:00\n",
112
  "#SBATCH -J eos\n",
113
- "#SBATCH -q regular\n",
114
  "#SBATCH -N 1\n",
115
  "#SBATCH -C gpu\n",
116
  "#SBATCH -G 4\n",
@@ -118,18 +118,18 @@
118
  "source ~/.bashrc\n",
119
  "module load python\n",
120
  "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\n",
121
- "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.21:42119 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60\n",
122
  "\n"
123
  ]
124
  },
125
  {
126
  "data": {
127
  "text/html": [
128
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">01:28:01.483 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.engine - Created flow run<span style=\"color: #800080; text-decoration-color: #800080\"> 'maroon-seagull'</span> for flow<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> 'run-from-db'</span>\n",
129
  "</pre>\n"
130
  ],
131
  "text/plain": [
132
- "01:28:01.483 | \u001b[36mINFO\u001b[0m | prefect.engine - Created flow run\u001b[35m 'maroon-seagull'\u001b[0m for flow\u001b[1;35m 'run-from-db'\u001b[0m\n"
133
  ]
134
  },
135
  "metadata": {},
@@ -138,11 +138,11 @@
138
  {
139
  "data": {
140
  "text/html": [
141
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">01:28:01.487 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.engine - View at <span style=\"color: #0000ff; text-decoration-color: #0000ff\">https://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/3e9f3df7-6054-4f2e-b81c-8b7735c168fe</span>\n",
142
  "</pre>\n"
143
  ],
144
  "text/plain": [
145
- "01:28:01.487 | \u001b[36mINFO\u001b[0m | prefect.engine - View at \u001b[94mhttps://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/3e9f3df7-6054-4f2e-b81c-8b7735c168fe\u001b[0m\n"
146
  ]
147
  },
148
  "metadata": {},
@@ -151,11 +151,11 @@
151
  {
152
  "data": {
153
  "text/html": [
154
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">01:28:02.091 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(ae948a05, 'tcp://128.55.64.21:42119', workers=0, threads=0, memory=0 B)\n",
155
  "</pre>\n"
156
  ],
157
  "text/plain": [
158
- "01:28:02.091 | \u001b[36mINFO\u001b[0m | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(ae948a05, 'tcp://128.55.64.21:42119', workers=0, threads=0, memory=0 B)\n"
159
  ]
160
  },
161
  "metadata": {},
@@ -164,11 +164,24 @@
164
  {
165
  "data": {
166
  "text/html": [
167
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">01:29:21.579 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | Task run 'get_atoms_from_db-3a9' - Created task run 'get_atoms_from_db-3a9' for task 'get_atoms_from_db'\n",
168
  "</pre>\n"
169
  ],
170
  "text/plain": [
171
- "01:29:21.579 | \u001b[36mINFO\u001b[0m | Task run 'get_atoms_from_db-3a9' - Created task run 'get_atoms_from_db-3a9' for task 'get_atoms_from_db'\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  ]
173
  },
174
  "metadata": {},
@@ -198,7 +211,7 @@
198
  " job_directives_skip=[\"-n\", \"--cpus-per-task\", \"-J\"],\n",
199
  " job_extra_directives=[\n",
200
  " \"-J eos\",\n",
201
- " \"-q regular\",\n",
202
  " f\"-N {nodes_per_alloc}\",\n",
203
  " \"-C gpu\",\n",
204
  " f\"-G {gpus_per_alloc}\",\n",
@@ -208,18 +221,19 @@
208
  "\n",
209
  "cluster = SLURMCluster(**cluster_kwargs)\n",
210
  "print(cluster.job_script())\n",
211
- "cluster.adapt(minimum_jobs=50, maximum_jobs=50)\n",
212
  "client = Client(cluster)\n",
213
  "\n",
214
- "run_from_db_ = run_from_db.with_options(\n",
215
- " task_runner=DaskTaskRunner(address=client.scheduler.address),\n",
216
- " # log_prints=True,\n",
217
- ")\n",
218
- "\n",
219
  "# Run the workflow\n",
220
  "\n",
221
- "results = run_from_db_(\n",
222
- " db_path=\"sqs_Fe-Ni-Cr.db\", out_path=\"eos.h5\", table_name=\"Fe-Ni-Cr\"\n",
 
 
 
 
 
 
223
  ")"
224
  ]
225
  },
 
80
  "from prefect_dask import DaskTaskRunner\n",
81
  "\n",
82
  "from mlip_arena.models import REGISTRY\n",
83
+ "from mlip_arena.tasks.eos_alloy.flow import run as EOS_ALLOY"
84
  ]
85
  },
86
  {
 
96
  "text": [
97
  "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/lib/python3.11/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.\n",
98
  "Perhaps you already have a cluster running?\n",
99
+ "Hosting the HTTP server on port 36141 instead\n",
100
  " warnings.warn(\n"
101
  ]
102
  },
 
110
  "#SBATCH --mem=0\n",
111
  "#SBATCH -t 00:30:00\n",
112
  "#SBATCH -J eos\n",
113
+ "#SBATCH -q debug\n",
114
  "#SBATCH -N 1\n",
115
  "#SBATCH -C gpu\n",
116
  "#SBATCH -G 4\n",
 
118
  "source ~/.bashrc\n",
119
  "module load python\n",
120
  "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\n",
121
+ "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.21:41671 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60\n",
122
  "\n"
123
  ]
124
  },
125
  {
126
  "data": {
127
  "text/html": [
128
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">16:10:48.738 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.engine - Created flow run<span style=\"color: #800080; text-decoration-color: #800080\"> 'azure-roadrunner'</span> for flow<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> 'EOS Alloy'</span>\n",
129
  "</pre>\n"
130
  ],
131
  "text/plain": [
132
+ "16:10:48.738 | \u001b[36mINFO\u001b[0m | prefect.engine - Created flow run\u001b[35m 'azure-roadrunner'\u001b[0m for flow\u001b[1;35m 'EOS Alloy'\u001b[0m\n"
133
  ]
134
  },
135
  "metadata": {},
 
138
  {
139
  "data": {
140
  "text/html": [
141
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">16:10:48.742 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.engine - View at <span style=\"color: #0000ff; text-decoration-color: #0000ff\">https://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/3c90a68c-fc1b-473a-a0e2-42212e6b1925</span>\n",
142
  "</pre>\n"
143
  ],
144
  "text/plain": [
145
+ "16:10:48.742 | \u001b[36mINFO\u001b[0m | prefect.engine - View at \u001b[94mhttps://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/3c90a68c-fc1b-473a-a0e2-42212e6b1925\u001b[0m\n"
146
  ]
147
  },
148
  "metadata": {},
 
151
  {
152
  "data": {
153
  "text/html": [
154
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">16:10:49.296 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(edf7e3ce, 'tcp://128.55.64.21:41671', workers=0, threads=0, memory=0 B)\n",
155
  "</pre>\n"
156
  ],
157
  "text/plain": [
158
+ "16:10:49.296 | \u001b[36mINFO\u001b[0m | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(edf7e3ce, 'tcp://128.55.64.21:41671', workers=0, threads=0, memory=0 B)\n"
159
  ]
160
  },
161
  "metadata": {},
 
164
  {
165
  "data": {
166
  "text/html": [
167
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">16:10:49.329 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | Task run 'get_atoms_from_db-656' - Created task run 'get_atoms_from_db-656' for task 'get_atoms_from_db'\n",
168
  "</pre>\n"
169
  ],
170
  "text/plain": [
171
+ "16:10:49.329 | \u001b[36mINFO\u001b[0m | Task run 'get_atoms_from_db-656' - Created task run 'get_atoms_from_db-656' for task 'get_atoms_from_db'\n"
172
+ ]
173
+ },
174
+ "metadata": {},
175
+ "output_type": "display_data"
176
+ },
177
+ {
178
+ "data": {
179
+ "text/html": [
180
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">16:11:17.219 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span> | Task run 'get_atoms_from_db-656' - Finished in state <span style=\"color: #008000; text-decoration-color: #008000\">Completed</span>()\n",
181
+ "</pre>\n"
182
+ ],
183
+ "text/plain": [
184
+ "16:11:17.219 | \u001b[36mINFO\u001b[0m | Task run 'get_atoms_from_db-656' - Finished in state \u001b[32mCompleted\u001b[0m()\n"
185
  ]
186
  },
187
  "metadata": {},
 
211
  " job_directives_skip=[\"-n\", \"--cpus-per-task\", \"-J\"],\n",
212
  " job_extra_directives=[\n",
213
  " \"-J eos\",\n",
214
+ " \"-q debug\",\n",
215
  " f\"-N {nodes_per_alloc}\",\n",
216
  " \"-C gpu\",\n",
217
  " f\"-G {gpus_per_alloc}\",\n",
 
221
  "\n",
222
  "cluster = SLURMCluster(**cluster_kwargs)\n",
223
  "print(cluster.job_script())\n",
224
+ "cluster.adapt(minimum_jobs=2, maximum_jobs=2)\n",
225
  "client = Client(cluster)\n",
226
  "\n",
 
 
 
 
 
227
  "# Run the workflow\n",
228
  "\n",
229
+ "results = EOS_ALLOY.with_options(\n",
230
+ " task_runner=DaskTaskRunner(address=client.scheduler.address),\n",
231
+ " # log_prints=True,\n",
232
+ ")(\n",
233
+ " db_path=\"sqs_Fe-Ni-Cr.db\", \n",
234
+ " out_path=\"eos.h5\", \n",
235
+ " table_name=\"Fe-Ni-Cr\",\n",
236
+ " cache=False,\n",
237
  ")"
238
  ]
239
  },
mlip_arena/tasks/eos.py CHANGED
@@ -56,6 +56,7 @@ def run(
56
  max_abs_strain: float = 0.1,
57
  npoints: int = 11,
58
  concurrent: bool = True,
 
59
  cache_opt: bool = True,
60
  ) -> dict[str, Any] | State:
61
  """
@@ -81,7 +82,8 @@ def run(
81
  """
82
 
83
  OPT_ = OPT.with_options(
84
- refresh_cache=not cache_opt
 
85
  )
86
 
87
  state = OPT_(
 
56
  max_abs_strain: float = 0.1,
57
  npoints: int = 11,
58
  concurrent: bool = True,
59
+ persist_opt: bool = True,
60
  cache_opt: bool = True,
61
  ) -> dict[str, Any] | State:
62
  """
 
82
  """
83
 
84
  OPT_ = OPT.with_options(
85
+ refresh_cache=not cache_opt,
86
+ persist_result=persist_opt,
87
  )
88
 
89
  state = OPT_(
mlip_arena/tasks/eos_alloy/flow.py CHANGED
@@ -88,8 +88,10 @@ def save_to_hdf(
88
  print(e)
89
 
90
 
91
- @flow
92
- def run_from_db(
 
 
93
  db_path: Path | str,
94
  out_path: Path | str,
95
  table_name: str,
@@ -100,12 +102,14 @@ def run_from_db(
100
  criterion=dict(fmax=0.1, steps=1000),
101
  max_abs_strain=0.20,
102
  concurrent=False,
 
103
  ):
104
  EOS_ = EOS.with_options(
105
- on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)]
 
106
  )
107
 
108
- states = []
109
  for atoms in get_atoms_from_db(db_path):
110
  for mlip in MLIPEnum:
111
  if not REGISTRY[mlip.name]["npt"]:
@@ -115,7 +119,7 @@ def run_from_db(
115
  + REGISTRY[mlip.name].get("gpu-tasks", [])
116
  ):
117
  continue
118
- state = EOS_.submit(
119
  atoms=atoms,
120
  calculator_name=mlip.name,
121
  calculator_kwargs=dict(),
@@ -126,15 +130,16 @@ def run_from_db(
126
  criterion=criterion,
127
  max_abs_strain=max_abs_strain,
128
  concurrent=concurrent,
129
- cache_opt=False,
130
- return_state=True
 
131
  )
132
- states.append(state)
133
 
134
- wait(states)
135
 
136
  return [
137
- s.result(timeout=None, raise_on_failure=False)
138
- for s in states
139
- if s.is_completed()
140
  ]
 
88
  print(e)
89
 
90
 
91
+ @flow(
92
+ name="EOS Alloy"
93
+ )
94
+ def run(
95
  db_path: Path | str,
96
  out_path: Path | str,
97
  table_name: str,
 
102
  criterion=dict(fmax=0.1, steps=1000),
103
  max_abs_strain=0.20,
104
  concurrent=False,
105
+ cache=True,
106
  ):
107
  EOS_ = EOS.with_options(
108
+ on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)],
109
+ refresh_cache=not cache,
110
  )
111
 
112
+ futures = []
113
  for atoms in get_atoms_from_db(db_path):
114
  for mlip in MLIPEnum:
115
  if not REGISTRY[mlip.name]["npt"]:
 
119
  + REGISTRY[mlip.name].get("gpu-tasks", [])
120
  ):
121
  continue
122
+ future = EOS_.submit(
123
  atoms=atoms,
124
  calculator_name=mlip.name,
125
  calculator_kwargs=dict(),
 
130
  criterion=criterion,
131
  max_abs_strain=max_abs_strain,
132
  concurrent=concurrent,
133
+ persist_opt=cache,
134
+ cache_opt=cache,
135
+ # return_state=True
136
  )
137
+ futures.append(future)
138
 
139
+ wait(futures)
140
 
141
  return [
142
+ f.result(timeout=None, raise_on_failure=False)
143
+ for f in futures
144
+ if f.state.is_completed()
145
  ]