cyrusyc commited on
Commit
22f0dbc
·
1 Parent(s): 861ff07

update visual; add captions

Browse files
mlip_arena/models/externals/orb.py CHANGED
@@ -20,7 +20,7 @@ class ORB(ORBCalculator):
20
 
21
  cache_dir = Path.home() / ".cache" / "orb"
22
  cache_dir.mkdir(parents=True, exist_ok=True)
23
- ckpt_path = cache_dir / "orbff-v1-20240827.ckpt"
24
 
25
  url = f"https://storage.googleapis.com/orbitalmaterials-public-models/forcefields/{checkpoint}"
26
 
 
20
 
21
  cache_dir = Path.home() / ".cache" / "orb"
22
  cache_dir.mkdir(parents=True, exist_ok=True)
23
+ ckpt_path = cache_dir / checkpoint
24
 
25
  url = f"https://storage.googleapis.com/orbitalmaterials-public-models/forcefields/{checkpoint}"
26
 
serve/tasks/combustion.py CHANGED
@@ -72,12 +72,6 @@ def get_data(models):
72
 
73
  df = get_data(models)
74
 
75
- # families = [MODELS[str(model)]["family"] for model in models]
76
-
77
- # dfs = [pd.read_json(DATA_DIR / family.lower() / "hydrogen.json") for family in families]
78
- # df = pd.concat(dfs, ignore_index=True)
79
- # df.drop_duplicates(inplace=True, subset=["formula", "method"])
80
-
81
  method_color_mapping = {
82
  method: color_sequence[i % len(color_sequence)]
83
  for i, method in enumerate(df["method"].unique())
 
72
 
73
  df = get_data(models)
74
 
 
 
 
 
 
 
75
  method_color_mapping = {
76
  method: color_sequence[i % len(color_sequence)]
77
  for i, method in enumerate(df["method"].unique())
serve/tasks/stability.py CHANGED
@@ -17,20 +17,22 @@ st.markdown("""
17
  # High Pressure Stability
18
 
19
  Stable and accurate molecular dynamics (MD) simulations are important for understanding the properties of matters.
20
- However, many MLIPs have unphysical potential energy surface (PES) at the short-range interatomic distances or
21
- under many-body effect. These are often manifested as softened repulsion and hole in the PES and can lead to incorrect
22
- and sampling of the phase space.
23
 
24
- Here, we analyze the stability of the MD simulations under high pressure conditions by gradually increasing the pressure
25
- from 0 to 1000 GPa at 300K until the system crashes or completes 100 ps trajectory. This benchmark also explores faster the far-from-equilibrium
26
- dynamics of the system and the "durability" of the MLIPs under extreme conditions.
27
  """)
28
 
29
  st.markdown("### Methods")
30
  container = st.container(border=True)
31
- valid_models = [model for model, metadata in REGISTRY.items() if Path(__file__).stem in metadata.get("gpu-tasks", [])]
 
 
 
 
32
 
33
- models = container.multiselect("MLIPs", valid_models, ["MACE-MP(M)", "CHGNet", "ORB", "SevenNet"])
 
 
34
 
35
  st.markdown("### Settings")
36
  vis = st.container(border=True)
@@ -53,9 +55,9 @@ color_sequence = color_palettes[palette_name]
53
  if not models:
54
  st.stop()
55
 
 
56
  @st.cache_data
57
  def get_data(models):
58
-
59
  families = [REGISTRY[str(model)]["family"] for model in models]
60
 
61
  dfs = [
@@ -67,6 +69,7 @@ def get_data(models):
67
 
68
  return df
69
 
 
70
  df = get_data(models)
71
 
72
  method_color_mapping = {
@@ -83,10 +86,11 @@ max_steps = df["total_steps"].max()
83
  max_target_steps = df["target_steps"].max()
84
 
85
  bins = np.append(np.arange(0, max_steps + 1, max_steps // 10), max_target_steps)
86
- bin_labels = [f"{bins[i]}-{bins[i+1]}" for i in range(len(bins)-1)]
87
 
88
  num_bins = len(bin_labels)
89
- colormap = px.colors.sequential.Darkmint_r
 
90
  indices = np.linspace(0, len(colormap) - 1, num_bins, dtype=int)
91
  bin_colors = [colormap[i] for i in indices]
92
  # bin_colors[-1] = px.colors.sequential.Greens[-1]
@@ -102,32 +106,43 @@ for method, group in df.groupby("method"):
102
  counts_per_method[method] = counts
103
 
104
  # Sort the dictionary by the percentage of the last bin
105
- counts_per_method = {k: v for k, v in sorted(counts_per_method.items(), key=lambda item: item[1][-1]/sum(item[1]))}
 
 
 
 
 
106
 
107
 
108
  count_or_percetange = st.toggle("show counts", False)
109
 
 
110
  @st.experimental_fragment()
111
  def plot_md_steps(counts_per_method, count_or_percetange):
 
112
  # Create a figure
113
  fig = go.Figure()
114
 
115
  # Add a bar for each bin range across all methods
116
  for i, bin_label in enumerate(bin_labels):
117
  for method, counts in counts_per_method.items():
118
- fig.add_trace(go.Bar(
119
- # name=method, # This will be the legend entry
120
- x=[counts[i]/counts.sum()*100] if not count_or_percetange else [counts[i]],
121
- y=[method], # Method as the y-axis category
122
- # name=bin_label,
123
- orientation="h", # Horizontal bars
124
- marker=dict(
125
- color=bin_colors[i],
126
- line=dict(color="rgb(248, 248, 249)", width=1)
127
- ),
128
- text=f"{bin_label}: {counts[i]/counts.sum()*100:.0f}%",
129
- width=0.5
130
- ))
 
 
 
 
131
 
132
  # Update the layout to stack the bars
133
  fig.update_layout(
@@ -135,53 +150,38 @@ def plot_md_steps(counts_per_method, count_or_percetange):
135
  title="Total MD steps (before crash or completion)",
136
  xaxis_title="Percentage (%)" if not count_or_percetange else "Count",
137
  yaxis_title="Method",
138
- showlegend=False
139
  )
140
 
141
- # bins = np.linspace(0, 0.9, 10)
142
-
143
- # for method, data in df.groupby("method"):
144
-
145
- # # print(method, data)
146
- # counts, bins = np.histogram(data['total_steps'])
147
-
148
- # bin_labels = [f"{int(bins[i])}-{int(bins[i+1])}" for i in range(len(bins)-1)]
149
-
150
- # # Create a horizontal bar chart
151
- # fig = go.Figure(go.Bar(
152
- # x=[counts[i]], # Count for this bin
153
- # y=[method], # Method as the y-axis category
154
- # # x=counts, # Bar lengths
155
- # # y=bin_labels, # Bin labels as y-tick labels
156
- # orientation='h' # Horizontal bars
157
- # ))
158
-
159
-
160
- # # Update layout for clarity
161
- # fig.update_layout(
162
- # title="Histogram of Total Steps",
163
- # xaxis_title="Count",
164
- # yaxis_title="Total Steps Range"
165
- # )
166
-
167
  st.plotly_chart(fig)
168
 
169
 
170
  plot_md_steps(counts_per_method, count_or_percetange)
171
 
 
 
 
 
 
 
172
  ###
173
 
174
- # st.markdown("""
175
- # ## Runtime Analysis
 
 
 
 
 
176
 
177
- # """)
178
 
179
  def func(x, a, n):
180
  return a * x ** (-n)
181
 
 
182
  @st.experimental_fragment()
183
  def plot_speed(df, method_color_mapping):
184
-
185
  fig = px.scatter(
186
  df,
187
  x="natoms",
@@ -195,7 +195,7 @@ def plot_speed(df, method_color_mapping):
195
  log_x=True,
196
  # log_y=True,
197
  # range_y=[1, 1e2],
198
- range_x=[df["natoms"].min()*0.9, df["natoms"].max()*1.1],
199
  # range_x=[1e3, 1e2],
200
  title="Inference speed (on single A100 GPU)",
201
  labels={"steps_per_second": "Steps per second", "natoms": "Number of atoms"},
@@ -207,18 +207,26 @@ def plot_speed(df, method_color_mapping):
207
  data.dropna(subset=["steps_per_second"], inplace=True)
208
  popt, pcov = curve_fit(func, data["natoms"], data["steps_per_second"])
209
 
210
- fig.add_trace(go.Scatter(
211
- x=x,
212
- y=func(x, *popt),
213
- mode="lines",
214
- # name='Fit',
215
- line=dict(color=method_color_mapping[method], width=3),
216
- showlegend=False,
217
- name=f"{popt[0]:.2f}N^{-popt[1]:.2f}",
218
- hovertext=f"{popt[0]:.2f}N^{-popt[1]:.2f}",
219
- ))
 
 
220
 
221
  st.plotly_chart(fig)
222
 
223
 
224
  plot_speed(df, method_color_mapping)
 
 
 
 
 
 
 
17
  # High Pressure Stability
18
 
19
  Stable and accurate molecular dynamics (MD) simulations are important for understanding the properties of matters.
20
+ However, many MLIPs have unphysical potential energy surface (PES) at the short-range interatomic distances or under many-body effect. These are often manifested as softened repulsion and hole in the PES and can lead to incorrect and sampling of the phase space.
 
 
21
 
22
+ Here, we analyze the stability of the MD simulations under high pressure conditions by gradually increasing the pressure from 0 to 1000 GPa at 300K until the system crashes or completes 100 ps trajectory. This benchmark also explores faster the far-from-equilibrium dynamics of the system and the "durability" of the MLIPs under extreme conditions.
 
 
23
  """)
24
 
25
  st.markdown("### Methods")
26
  container = st.container(border=True)
27
+ valid_models = [
28
+ model
29
+ for model, metadata in REGISTRY.items()
30
+ if Path(__file__).stem in metadata.get("gpu-tasks", [])
31
+ ]
32
 
33
+ models = container.multiselect(
34
+ "MLIPs", valid_models, ["MACE-MP(M)", "CHGNet", "ORB", "SevenNet"]
35
+ )
36
 
37
  st.markdown("### Settings")
38
  vis = st.container(border=True)
 
55
  if not models:
56
  st.stop()
57
 
58
+
59
  @st.cache_data
60
  def get_data(models):
 
61
  families = [REGISTRY[str(model)]["family"] for model in models]
62
 
63
  dfs = [
 
69
 
70
  return df
71
 
72
+
73
  df = get_data(models)
74
 
75
  method_color_mapping = {
 
86
  max_target_steps = df["target_steps"].max()
87
 
88
  bins = np.append(np.arange(0, max_steps + 1, max_steps // 10), max_target_steps)
89
+ bin_labels = [f"{bins[i]}-{bins[i+1]}" for i in range(len(bins) - 1)]
90
 
91
  num_bins = len(bin_labels)
92
+ # colormap = px.colors.sequential.Darkmint_r
93
+ colormap = px.colors.sequential.YlOrRd_r
94
  indices = np.linspace(0, len(colormap) - 1, num_bins, dtype=int)
95
  bin_colors = [colormap[i] for i in indices]
96
  # bin_colors[-1] = px.colors.sequential.Greens[-1]
 
106
  counts_per_method[method] = counts
107
 
108
  # Sort the dictionary by the percentage of the last bin
109
+ counts_per_method = {
110
+ k: v
111
+ for k, v in sorted(
112
+ counts_per_method.items(), key=lambda item: item[1][-1] / sum(item[1])
113
+ )
114
+ }
115
 
116
 
117
  count_or_percetange = st.toggle("show counts", False)
118
 
119
+
120
  @st.experimental_fragment()
121
  def plot_md_steps(counts_per_method, count_or_percetange):
122
+ """Plot the distribution of the total number of MD steps before crash or completion."""
123
  # Create a figure
124
  fig = go.Figure()
125
 
126
  # Add a bar for each bin range across all methods
127
  for i, bin_label in enumerate(bin_labels):
128
  for method, counts in counts_per_method.items():
129
+ fig.add_trace(
130
+ go.Bar(
131
+ # name=method, # This will be the legend entry
132
+ x=[counts[i] / counts.sum() * 100]
133
+ if not count_or_percetange
134
+ else [counts[i]],
135
+ y=[method], # Method as the y-axis category
136
+ # name=bin_label,
137
+ orientation="h", # Horizontal bars
138
+ marker=dict(
139
+ color=bin_colors[i],
140
+ line=dict(color="rgb(248, 248, 249)", width=1),
141
+ ),
142
+ text=f"{bin_label}: {counts[i]/counts.sum()*100:.0f}%",
143
+ width=0.5,
144
+ )
145
+ )
146
 
147
  # Update the layout to stack the bars
148
  fig.update_layout(
 
150
  title="Total MD steps (before crash or completion)",
151
  xaxis_title="Percentage (%)" if not count_or_percetange else "Count",
152
  yaxis_title="Method",
153
+ showlegend=False,
154
  )
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  st.plotly_chart(fig)
157
 
158
 
159
  plot_md_steps(counts_per_method, count_or_percetange)
160
 
161
+ st.markdown(
162
+ """
163
+ > The histogram shows the distribution of the total number of MD steps before the system crashes or completes the trajectory. :red[The color of the bins indicates the number of steps in the bin]. :blue[The height of the bars indicates the percentage of each bin among all the runs].
164
+ """
165
+ )
166
+
167
  ###
168
 
169
+ st.markdown(
170
+ """
171
+ ## Inference speed
172
+
173
+ The inference speed of the MLIPs is crucial for the high-throughput virutal screening. Under high pressure conditions, the atoms often move faster and closer to each other, which increases the size of neighbor list and local graph construction and hence slows down the inference speed.
174
+ """
175
+ )
176
 
 
177
 
178
  def func(x, a, n):
179
  return a * x ** (-n)
180
 
181
+
182
  @st.experimental_fragment()
183
  def plot_speed(df, method_color_mapping):
184
+ """Plot the inference speed as a function of the number of atoms."""
185
  fig = px.scatter(
186
  df,
187
  x="natoms",
 
195
  log_x=True,
196
  # log_y=True,
197
  # range_y=[1, 1e2],
198
+ range_x=[df["natoms"].min() * 0.9, df["natoms"].max() * 1.1],
199
  # range_x=[1e3, 1e2],
200
  title="Inference speed (on single A100 GPU)",
201
  labels={"steps_per_second": "Steps per second", "natoms": "Number of atoms"},
 
207
  data.dropna(subset=["steps_per_second"], inplace=True)
208
  popt, pcov = curve_fit(func, data["natoms"], data["steps_per_second"])
209
 
210
+ fig.add_trace(
211
+ go.Scatter(
212
+ x=x,
213
+ y=func(x, *popt),
214
+ mode="lines",
215
+ # name='Fit',
216
+ line=dict(color=method_color_mapping[method], width=3),
217
+ showlegend=False,
218
+ name=f"{popt[0]:.2f}N^{-popt[1]:.2f}",
219
+ hovertext=f"{popt[0]:.2f}N^{-popt[1]:.2f}",
220
+ )
221
+ )
222
 
223
  st.plotly_chart(fig)
224
 
225
 
226
  plot_speed(df, method_color_mapping)
227
+
228
+ st.markdown(
229
+ """
230
+ > The plot shows the inference speed (steps per second) as a function of the number of atoms in the system. :red[The size of the points is proportional to the total number of steps in the MD trajectory before crash or completion (~49990)]. :blue[The lines show the fit of the data to the power law function $a N^{-n}$], where $N$ is the number of atoms and $a$ and $n$ are the fit parameters.
231
+ """
232
+ )