matsammut commited on
Commit
8af6ce4
·
verified ·
1 Parent(s): 1bb20ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import numpy as np
5
  from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
6
  from sklearn.impute import KNNImputer
 
7
 
8
  # Load your saved model
9
  # model = joblib.load("ann_model.joblib")
@@ -42,10 +43,21 @@ def cleaning_features(data):
42
  # Binarize native country
43
  data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
44
  data['native-country'] = data['native-country'].astype(int)
 
 
45
 
46
- print(data.head(10))
 
 
 
 
 
 
 
 
 
 
47
 
48
- return data, encoder, scaler
49
 
50
  # Create the Gradio interface
51
  interface = gr.Interface(
 
4
  import numpy as np
5
  from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
6
  from sklearn.impute import KNNImputer
7
+ from sklearn.decomposition import PCA
8
 
9
  # Load your saved model
10
  # model = joblib.load("ann_model.joblib")
 
43
  # Binarize native country
44
  data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
45
  data['native-country'] = data['native-country'].astype(int)
46
+ data = pca(data)
47
+ return data
48
 
49
+ def pca(data):
50
+ encoder = OneHotEncoder(sparse_output=False)
51
+ one_hot_encoded = encoder.fit_transform(data[['workclass', 'occupation']])
52
+ encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out())
53
+ pca_net = PCA(n_components=10)
54
+ pca_result_net = pca_net.fit_transform(encoded_columns_df)
55
+ pca_columns = [f'pca_component_{i+1}' for i in range(10)]
56
+ pca_df = pd.DataFrame(pca_result_net, columns=pca_columns)
57
+ data = data.drop(columns=['workclass', 'occupation'], axis=1) #remove the original columns
58
+ data = pd.concat([data, pca_df], axis=1)
59
+ return data
60
 
 
61
 
62
  # Create the Gradio interface
63
  interface = gr.Interface(