cyrusyc commited on
Commit
b3722a8
·
1 Parent(s): 66b89f4

add jobs tasks

Browse files
README.md CHANGED
@@ -23,7 +23,11 @@ If you have pretrained MLIP models that you would like to contribute to the MLIP
23
  2. Follow the template to code the I/O interface for your model, and upload the script along with metadata to the MLIP Arena [here]().
24
  3. CPU benchmarking will be performed automatically. Due to the limited amount GPU compute, if you would like to be considered for GPU benchmarking, please create a pull request to demonstrate the offline performance of your model (published paper or preprint). We will review and select the models to be benchmarked on GPU.
25
 
26
- ### Add new benchmarks
 
 
 
 
27
 
28
  #### Molecular dynamics calculations
29
 
 
23
  2. Follow the template to code the I/O interface for your model, and upload the script along with metadata to the MLIP Arena [here]().
24
  3. CPU benchmarking will be performed automatically. Due to the limited amount GPU compute, if you would like to be considered for GPU benchmarking, please create a pull request to demonstrate the offline performance of your model (published paper or preprint). We will review and select the models to be benchmarked on GPU.
25
 
26
+ ### Add new benchmark tasks
27
+
28
+ 1. Create a new [Hugging Face Dataset](https://huggingface.co/new-dataset) repository and upload the reference data (e.g. DFT, AIMD, experimental measurements such as RDF).
29
+ 2. Follow the task template to implement the task class and upload the script along with metadata to the MLIP Arena [here]().
30
+ 3. Code a benchmark script to evaluate the performance of your model on the task. The script should be able to load the model and the dataset, and output the evaluation metrics.
31
 
32
  #### Molecular dynamics calculations
33
 
mlip_arena/jobs/__init__.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import enum
3
+
4
+ from mlip_arena.models import MLIP
5
+ from mlip_arena.tasks import Task
6
+
7
+
8
+ class Machine(enum.Enum):
9
+ """Enum class for machine"""
10
+ HFCPU = "Hugging Face CPU Basic"
11
+ PERLCPU = "NERSC Perlmutter CPU"
12
+ PERLA100 = "NERSC Perlmutter A100 40GB"
13
+ PERLA100L = "NERSC Perlmutter A100 80GB"
14
+
15
+ class Job:
16
+ def __init__(self, model: MLIP, task: Task, machine: Machine, **kwargs):
17
+ self.calculator = model
18
+ self.task = task
19
+ self.machine = machine
20
+ self.kwargs = kwargs
21
+
22
+ def __str__(self):
23
+ return f"Job: {self.task.name} on {self.machine.value}"
24
+
25
+ def run(self):
26
+ if self.machine == Machine.HFCPU:
27
+ print(f"Running {self.name} on {self.machine.value}")
28
+ "run the task on Hugging Face CPU Basic"
29
+ raise NotImplementedError
30
+ elif self.machine == Machine.PERLCPU:
31
+ print(f"Running {self.name} on {self.machine.value}")
32
+ "send the task to NERSC Perlmutter CPU node and listen for the results"
33
+ raise NotImplementedError
34
+ elif self.machine == Machine.PERLA100:
35
+ print(f"Running {self.name} on {self.machine.value}")
36
+ "send the task to NERSC Perlmutter GPU node and listen for the results"
37
+ raise NotImplementedError
38
+
mlip_arena/jobs/run.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+
3
+ from mlip_arena.models import REGISTRY as MODEL_REGISTRY
4
+ from mlip_arena.tasks import REGISTRY as TASK_REGISTRY
5
+
6
+ print(MODEL_REGISTRY)
7
+ print(TASK_REGISTRY)
8
+
9
+ for task, metadata in TASK_REGISTRY.items():
10
+
11
+ print(f"mlip_arena.tasks.{task}")
12
+ module = importlib.import_module(f"mlip_arena.tasks.{task}")
13
+ module.whoami()
mlip_arena/models/README.md CHANGED
@@ -6,4 +6,4 @@
6
  2. Add the classes and their supported tasks to the model registry file `registry.yaml`.
7
  3. Run tests on HF Space to ensure the model is working as expected.
8
  4. [Push files to the Hub](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
9
- 5. Use [HF webhook](https://huggingface.co/docs/hub/en/webhooks) to run tasks and visualize the results on leaderboard. [[guide]](https://huggingface.co/docs/hub/en/webhooks-guide-metadata-review)
 
6
  2. Add the classes and their supported tasks to the model registry file `registry.yaml`.
7
  3. Run tests on HF Space to ensure the model is working as expected.
8
  4. [Push files to the Hub](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
9
+ 5. Use [HF webhook](https://huggingface.co/docs/hub/en/webhooks) to check the status of benchmark tasks (pass, fail, null), run unfinisehd tasks and visualize the results on leaderboard. [[guide]](https://huggingface.co/docs/hub/en/webhooks-guide-metadata-review)
mlip_arena/models/__init__.py CHANGED
@@ -1,19 +1,24 @@
 
 
1
  import torch
 
2
  from ase import Atoms
3
  from ase.calculators.calculator import Calculator, all_changes
4
  from torch import nn
5
  from torch_geometric.data import Data
6
 
7
- REGISTRY_FILE = 'registry.yaml'
8
-
9
 
10
  class MLIP(Calculator):
11
- def __init__(self):
 
 
12
  super().__init__()
13
- self.name: str = "MLIP"
14
  self.version: str = None
15
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
- self.model: nn.Module = None
17
  self.implemented_properties = ["energy", "forces"]
18
 
19
  def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):
 
1
+ import os
2
+
3
  import torch
4
+ import yaml
5
  from ase import Atoms
6
  from ase.calculators.calculator import Calculator, all_changes
7
  from torch import nn
8
  from torch_geometric.data import Data
9
 
10
+ with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
11
+ REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
12
 
13
  class MLIP(Calculator):
14
+ def __init__(self,
15
+ model_path: str = None,
16
+ device: torch.device = None):
17
  super().__init__()
18
+ self.name: str = self.__class__.__name__
19
  self.version: str = None
20
+ self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+ self.model: nn.Module = torch.load(model_path, map_location=self.device)
22
  self.implemented_properties = ["energy", "forces"]
23
 
24
  def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):
mlip_arena/models/mace-mp.py CHANGED
@@ -8,15 +8,17 @@ from mlip_arena.models import MLIP
8
 
9
 
10
  class MACE_MP_Medium(MLIP):
11
- def __init__(self):
12
- super().__init__()
 
 
 
 
 
 
 
13
  self.name = "MACE-MP-0 (medium)"
14
  self.version = "1.0.0"
15
-
16
- fpath = hf_hub_download(repo_id="cyrusyc/mace-universal", subfolder="pretrained", filename="2023-12-12-mace-128-L1_epoch-199.model")
17
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- self.model = torch.load(fpath, map_location="cpu")
19
- self.model.to(self.device)
20
  self.implemented_properties = [
21
  "energy",
22
  "forces",
 
8
 
9
 
10
  class MACE_MP_Medium(MLIP):
11
+ def __init__(self, device: torch.device = None):
12
+ fpath = hf_hub_download(
13
+ repo_id="cyrusyc/mace-universal",
14
+ subfolder="pretrained",
15
+ filename="2023-12-12-mace-128-L1_epoch-199.model",
16
+ revision=None # TODO: Add revision
17
+ )
18
+ super().__init__(model_path=fpath, device=device)
19
+
20
  self.name = "MACE-MP-0 (medium)"
21
  self.version = "1.0.0"
 
 
 
 
 
22
  self.implemented_properties = [
23
  "energy",
24
  "forces",
mlip_arena/models/registry.yaml CHANGED
@@ -4,6 +4,8 @@ MACE_MP_Medium:
4
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
5
  datasets: # list of training datasets
6
  - atomind/mptrj # TODO: fake HF dataset repo
7
- tasks:
8
  - alexandria
9
- - qmof
 
 
 
4
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
5
  datasets: # list of training datasets
6
  - atomind/mptrj # TODO: fake HF dataset repo
7
+ cpu-tasks:
8
  - alexandria
9
+ - qmof
10
+ gpu-tasks:
11
+
mlip_arena/tasks/README.md CHANGED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Note on task registration
3
+
4
+ 1. Use `ast` to parse task classes from the uploaded script.
5
+ 2. Add the classes and their supported tasks to the task registry file `registry.yaml`.
6
+ 3. Run tests on HF Space to ensure the task is working as expected.
7
+ 4. [Push task script to the Space](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
8
+ 5. Create task folder in [mlip-arena](https://huggingface.co/datasets/atomind/mlip-arena) HF Dataset.
9
+ 6.
mlip_arena/tasks/__init__.py CHANGED
@@ -1,3 +1,44 @@
 
1
 
 
 
2
 
3
- REGISTRY_FILE = 'registry.yaml'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
 
3
+ import yaml
4
+ from huggingface_hub import HfApi, HfFileSystem, hf_hub_download
5
 
6
+ from mlip_arena.models import MLIP
7
+ from mlip_arena.models import REGISTRY as MODEL_REGISTRY
8
+
9
+ with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
10
+ REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
11
+
12
+
13
+ class Task:
14
+ def __init__(self):
15
+ self.name: str = self.__class__.__name__ # display name on the leaderboard
16
+
17
+ def run(self, model: MLIP):
18
+ """Run the task using the given model and return the results"""
19
+ raise NotImplementedError
20
+
21
+ # Calcualte evaluation metrics and postprocessed data
22
+ api = HfApi()
23
+ api.upload_file(
24
+ path_or_fileobj="results.json",
25
+ path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json", # Upload to a specific folder
26
+ repo_id="atomind/mlip-arena",
27
+ repo_type="dataset"
28
+ )
29
+
30
+ def get_results(self):
31
+ """Get the results from the task"""
32
+
33
+ # fs = HfFileSystem()
34
+ # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
35
+
36
+ for model, metadata in MODEL_REGISTRY.items():
37
+ results = hf_hub_download(
38
+ repo_id="atomind/mlip-arena", filename="results.json",
39
+ subfolder=f"{self.__class__.__name__}/{model}",
40
+ repo_type="dataset",
41
+ revision=None
42
+ )
43
+
44
+ return results
mlip_arena/tasks/alexandria.py CHANGED
@@ -1,3 +1,7 @@
1
 
2
 
3
  URL = "https://alexandria.icams.rub.de/"
 
 
 
 
 
1
 
2
 
3
  URL = "https://alexandria.icams.rub.de/"
4
+
5
+
6
+ def whoami():
7
+ print(f'TEST: {__file__}')
mlip_arena/tasks/nacl.py CHANGED
@@ -1,7 +1,11 @@
1
 
2
- import yaml
3
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 
4
 
5
- from mlip_arena.models import REGISTRY_FILE
6
 
7
- yaml.load(open(REGISTRY_FILE), Loader=yaml.FullLoader)
 
 
 
 
 
 
1
 
 
2
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
3
+ from mlip_arena.models import MLIP
4
 
 
5
 
6
+ def whoami():
7
+ print(f'TEST: {__file__}')
8
+
9
+
10
+ if __name__ == "__main__":
11
+
mlip_arena/tasks/qmof.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+
3
+ def whoami():
4
+ print(f'TEST: {__file__}')