msiron commited on
Commit
193b388
·
1 Parent(s): aba1804

filter library rather than pandas memory load

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -15,7 +15,11 @@ from pymatgen.entries.computed_entries import (
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
- subsets = ["compatible_pbe", "compatible_pbesol", "compatible_scan",]
 
 
 
 
19
 
20
  # Load only the train split of the dataset
21
 
@@ -39,11 +43,11 @@ for subset in subsets:
39
  datasets.append(dataset["train"])
40
 
41
  # Convert the train split to a pandas DataFrame
42
- df = pd.concat([x.to_pandas() for x in datasets])
43
- train_df = dataset.to_pandas()
44
- del dataset
45
-
46
 
 
47
 
48
 
49
  def create_phase_diagram(
@@ -60,17 +64,23 @@ def create_phase_diagram(
60
 
61
  # Filter entries based on functional
62
  if functional == "PBE":
63
- entries_df = train_df[train_df["functional"] == "pbe"]
 
64
  elif functional == "PBESol":
65
- entries_df = train_df[train_df["functional"] == "pbesol"]
 
66
  elif functional == "SCAN":
67
- entries_df = train_df[train_df["functional"] == "scan"]
 
68
 
69
  isubset = lambda x: set(x).issubset(element_list)
70
  isintersection = lambda x: len(set(x).intersection(element_list)) > 0
71
- entries_df = entries_df[
72
- [isintersection(l) and isubset(l) for l in entries_df.elements.values.tolist()]
73
- ]
 
 
 
74
 
75
  # Fetch all entries from the Materials Project database
76
  entries = [
 
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
+ subsets = [
19
+ "compatible_pbe",
20
+ "compatible_pbesol",
21
+ "compatible_scan",
22
+ ]
23
 
24
  # Load only the train split of the dataset
25
 
 
43
  datasets.append(dataset["train"])
44
 
45
  # Convert the train split to a pandas DataFrame
46
+ # df = pd.concat([x.to_pandas() for x in datasets])
47
+ # train_df = dataset.to_pandas()
48
+ # del dataset
 
49
 
50
+ dataset = concatenate_datasets(datasets)
51
 
52
 
53
  def create_phase_diagram(
 
64
 
65
  # Filter entries based on functional
66
  if functional == "PBE":
67
+ ds_filter = dataset.filter(lambda example: example["functional"] == "pbe")
68
+ # entries_df = train_df[train_df["functional"] == "pbe"]
69
  elif functional == "PBESol":
70
+ ds_filter = dataset.filter(lambda example: example["functional"] == "pbesol")
71
+ # entries_df = train_df[train_df["functional"] == "pbesol"]
72
  elif functional == "SCAN":
73
+ ds_filter = dataset.filter(lambda example: example["functional"] == "scan")
74
+ # entries_df = train_df[train_df["functional"] == "scan"]
75
 
76
  isubset = lambda x: set(x).issubset(element_list)
77
  isintersection = lambda x: len(set(x).intersection(element_list)) > 0
78
+ ds_filter = ds_filter.filter(
79
+ lambda example: isintersection(example["functional"])
80
+ and isubset(example["functional"])
81
+ )
82
+
83
+ entries_df = ds_filter.to_pandas()
84
 
85
  # Fetch all entries from the Materials Project database
86
  entries = [