matsammut commited on
Commit
0bf6315
·
verified ·
1 Parent(s): 8f30954

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -59,6 +59,8 @@ def predict_hb(age, workclass, education, occupation, race, gender, capital_ga
59
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
60
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
61
  # "hours-per-week":[hours_per_week], "native-country":[native_country]}
 
 
62
  columns = {
63
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
64
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -70,10 +72,19 @@ def predict_hb(age, workclass, education, occupation, race, gender, capital_ga
70
  # ann_model = pickle.load(ann_model_file)
71
  scaler = StandardScaler()
72
  X = scaler.fit_transform(fixed_features)
73
- hb_model = joblib.load('hdbscan_model.joblib')
74
- prediction = hdbscan.approximate_predict(hb_model,fixed_features)
75
- # prediction = 1
76
- return f"Predicted Cluster (HDBSCAN): {prediction}"
 
 
 
 
 
 
 
 
 
77
 
78
 
79
  def cleaning_features(data,race,hdbscan):
@@ -137,11 +148,15 @@ def cleaning_features(data,race,hdbscan):
137
 
138
  data = pca(data)
139
  if(hdbscan):
 
 
 
140
  data['capital-gain'] = np.log1p(data['capital-gain'])
141
  data['capital-loss'] = np.log1p(data['capital-loss'])
142
  scaler = joblib.load("robust_scaler.pkl")
143
  numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
144
  data[numerical_features] = scaler.transform(data[numerical_features])
 
145
  return data
146
 
147
  # def pca(data):
 
59
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
60
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
61
  # "hours-per-week":[hours_per_week], "native-country":[native_country]}
62
+
63
+
64
  columns = {
65
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
66
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
72
  # ann_model = pickle.load(ann_model_file)
73
  scaler = StandardScaler()
74
  X = scaler.fit_transform(fixed_features)
75
+
76
+ clusterer = hdbscan.HDBSCAN(
77
+ min_cluster_size=220,
78
+ min_samples=117,
79
+ metric='euclidean',
80
+ cluster_selection_method='eom',
81
+ prediction_data=True,
82
+ cluster_selection_epsilon=0.28479667859306007
83
+ )
84
+
85
+ prediction = clusterer.fit_predict(X)
86
+
87
+ return f"Predicted Cluster (HDBSCAN): {prediction[-1]}"
88
 
89
 
90
  def cleaning_features(data,race,hdbscan):
 
148
 
149
  data = pca(data)
150
  if(hdbscan):
151
+ df_transformed = pd.read_csv('dataset.csv')
152
+ X = df_transformed.drop('income', axis=1)
153
+ data = pd.concat([X, data], ignore_index=True)
154
  data['capital-gain'] = np.log1p(data['capital-gain'])
155
  data['capital-loss'] = np.log1p(data['capital-loss'])
156
  scaler = joblib.load("robust_scaler.pkl")
157
  numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
158
  data[numerical_features] = scaler.transform(data[numerical_features])
159
+
160
  return data
161
 
162
  # def pca(data):