msiron commited on
Commit
9ec01d1
·
1 Parent(s): 2e61c5c

pandas switch

Browse files
Files changed (1) hide show
  1. app.py +47 -48
app.py CHANGED
@@ -22,44 +22,42 @@ subsets = [
22
  "compatible_scan",
23
  ]
24
 
25
- polars_dfs = {
26
- subset: pl.read_parquet(
27
- "hf://datasets/LeMaterial/LeMat1/{}/train-*.parquet".format(subset),
28
- storage_options={
29
- "token": HF_TOKEN,
30
- },
31
- )
32
- for subset in subsets
33
- }
34
-
35
- # Load only the train split of the dataset
36
-
37
- # datasets = []
38
- # for subset in subsets:
39
- # dataset = load_dataset(
40
- # "LeMaterial/leMat-Bulk",
41
- # subset,
42
- # token=HF_TOKEN,
43
- # columns=[
44
- # "lattice_vectors",
45
- # "species_at_sites",
46
- # "cartesian_site_positions",
47
- # "energy",
48
- # "energy_corrected",
49
- # "immutable_id",
50
- # "elements",
51
- # "functional",
52
- # ],
53
  # )
54
- # datasets.append(dataset["train"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Convert the train split to a pandas DataFrame
57
  # df = pd.concat([x.to_pandas() for x in datasets])
58
  # train_df = dataset.to_pandas()
59
  # del dataset
60
 
61
- dataset = concatenate_datasets(datasets)
62
-
63
  # dataset_element_combination_dict = {}
64
 
65
  # isubset = lambda x: set(x).issubset(element_list)
@@ -90,33 +88,34 @@ def create_phase_diagram(
90
 
91
  # Filter entries based on functional
92
  if functional == "PBE":
93
- df = polars_dfs["compatible_pbe"].clone()
94
  # entries_df = train_df[train_df["functional"] == "pbe"]
95
  elif functional == "PBESol":
96
- df = polars_dfs["compatible_pbesol"].clone()
97
  # entries_df = train_df[train_df["functional"] == "pbesol"]
98
  elif functional == "SCAN":
99
- df = polars_dfs["compatible_scan"].clone()
100
  # entries_df = train_df[train_df["functional"] == "scan"]
101
 
102
  # entries_df = df.to_pandas()
103
 
104
- # isubset = lambda x: set(x).issubset(element_list)
105
- # isintersection = lambda x: len(set(x).intersection(element_list)) > 0
106
- # entries_df = entries_df[entries_df["elements"]](
107
- # lambda example: isintersection(example["elements"])
108
- # and isubset(example["elements"])
109
- # )
110
 
111
- df = df.filter((df.col("elements").list.contains(x) for x in element_list))
112
- df = df.filter(
113
- pl.col("elements")
114
- .list.eval(pl.element().is_in(element_list))
115
- .list.any()
116
- .alias("check")
117
- )
 
 
 
 
 
 
118
 
119
- entries_df = df.to_pandas()
120
 
121
  # Fetch all entries from the Materials Project database
122
  entries = [
 
22
  "compatible_scan",
23
  ]
24
 
25
+ # polars_dfs = {
26
+ # subset: pl.read_parquet(
27
+ # "hf://datasets/LeMaterial/LeMat1/{}/train-*.parquet".format(subset),
28
+ # storage_options={
29
+ # "token": HF_TOKEN,
30
+ # },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # )
32
+ # for subset in subsets
33
+ # }
34
+
35
+ # # Load only the train split of the dataset
36
+
37
+ subsets_ds = {}
38
+ for subset in subsets:
39
+ dataset = load_dataset(
40
+ "LeMaterial/leMat-Bulk",
41
+ subset,
42
+ token=HF_TOKEN,
43
+ columns=[
44
+ "lattice_vectors",
45
+ "species_at_sites",
46
+ "cartesian_site_positions",
47
+ "energy",
48
+ "energy_corrected",
49
+ "immutable_id",
50
+ "elements",
51
+ "functional",
52
+ ],
53
+ )
54
+ subsets_ds[subset] = dataset["train"]
55
 
56
  # Convert the train split to a pandas DataFrame
57
  # df = pd.concat([x.to_pandas() for x in datasets])
58
  # train_df = dataset.to_pandas()
59
  # del dataset
60
 
 
 
61
  # dataset_element_combination_dict = {}
62
 
63
  # isubset = lambda x: set(x).issubset(element_list)
 
88
 
89
  # Filter entries based on functional
90
  if functional == "PBE":
91
+ entries_df = subsets_ds["compatible_pbe"].to_pandas()
92
  # entries_df = train_df[train_df["functional"] == "pbe"]
93
  elif functional == "PBESol":
94
+ entries_df = subsets_ds["compatible_pbesol"].to_pandas()
95
  # entries_df = train_df[train_df["functional"] == "pbesol"]
96
  elif functional == "SCAN":
97
+ entries_df = subsets_ds["compatible_scan"].to_pandas()
98
  # entries_df = train_df[train_df["functional"] == "scan"]
99
 
100
  # entries_df = df.to_pandas()
101
 
102
+ entries_df = entries_df[~entries_df['immutable_id'].isna()]
 
 
 
 
 
103
 
104
+ isubset = lambda x: set(x).issubset(element_list)
105
+ isintersection = lambda x: len(set(x).intersection(element_list)) > 0
106
+ entries_df = entries_df[
107
+ [isintersection(l) and isubset(l) for l in entries_df.elements.values.tolist()]
108
+ ]
109
+
110
+ # df = df.filter((df.col("elements").list.contains(x) for x in element_list))
111
+ # df = df.filter(
112
+ # pl.col("elements")
113
+ # .list.eval(pl.element().is_in(element_list))
114
+ # .list.any()
115
+ # .alias("check")
116
+ # )
117
 
118
+ # entries_df = df.to_pandas()
119
 
120
  # Fetch all entries from the Materials Project database
121
  entries = [