MotoPanda commited on
Commit
7567aee
·
verified ·
1 Parent(s): b797b5c

Create heuristic.py

Browse files
Files changed (1) hide show
  1. heuristic.py +60 -0
heuristic.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+ class HeuristicRegressor:
7
+ def __init__(self, buckets: dict, cat_cols: list, target: str):
8
+ self.buckets = buckets
9
+ self.target = target
10
+ self.cat_cols = cat_cols
11
+
12
+ def fit(self, data):
13
+ bucketed_cols = []
14
+ for i, (bucket_col, buckets_per_col) in enumerate(self.buckets.items()):
15
+ data[f"{bucket_col}_bucket"] = pd.cut(data[bucket_col], buckets_per_col)
16
+ bucketed_cols.append(f"{bucket_col}_bucket")
17
+ self.target_medians = data.groupby(bucketed_cols + self.cat_cols).agg({self.target: "median"}).reset_index()
18
+
19
+ self.unconditional_median = data[self.target].median()
20
+ self.target_medians[self.target] = self.target_medians[self.target].fillna(value=self.unconditional_median)
21
+
22
+ for bucketed_col in bucketed_cols:
23
+ self.target_medians[f"{bucketed_col}_left"] = self.target_medians[bucketed_col].apply(lambda x: x.left).astype(float)
24
+ self.target_medians[f"{bucketed_col}_right"] = self.target_medians[bucketed_col].apply(lambda x: x.right).astype(float)
25
+ self.target_medians.drop(columns=bucketed_cols, inplace=True)
26
+
27
+ def predict(self, value_dict: pd.DataFrame):
28
+ try:
29
+ boolean_indexer = [True for i in range(self.target_medians.shape[0])]
30
+
31
+ for bucket_col in self.buckets.keys():
32
+ value_for_col = value_dict[bucket_col]
33
+ boolean_indexer = (boolean_indexer
34
+ & ((self.target_medians[bucket_col + "_bucket_left"] < value_for_col)
35
+ & (self.target_medians[bucket_col + "_bucket_right"] >= value_for_col)))
36
+
37
+ for cat_col in self.cat_cols:
38
+ value_for_col = value_dict[cat_col]
39
+ boolean_indexer = (boolean_indexer & (self.target_medians[cat_col] == value_for_col))
40
+
41
+ return self.target_medians.loc[boolean_indexer, self.target].values[0]
42
+ except:
43
+ return self.unconditional_median
44
+
45
+ def to_json(self):
46
+ target_medians_dict = self.target_medians.to_dict()
47
+ return json.dumps({"buckets": self.buckets,
48
+ "target": self.target,
49
+ "cat_cols": self.cat_cols,
50
+ "unconditional_median": self.unconditional_median,
51
+ "target_medians_dict": target_medians_dict})
52
+
53
+ @classmethod
54
+ def from_json(cls, json_):
55
+ json_package = json.loads(json_)
56
+ model = cls(buckets=json_package["buckets"], cat_cols=json_package["cat_cols"], target=json_package["target"])
57
+ target_medians = pd.DataFrame.from_dict(json_package["target_medians_dict"])
58
+ model.target_medians = target_medians
59
+ model.unconditional_median = json_package["unconditional_median"]
60
+ return model