Nathanotal commited on
Commit
b56bc50
·
1 Parent(s): 316c194
Files changed (2) hide show
  1. app.py +120 -20
  2. requirements.txt +3 -0
app.py CHANGED
@@ -3,10 +3,107 @@ import numpy as np
3
  from PIL import Image
4
  import requests
5
  import pandas as pd
 
 
 
 
 
 
6
 
7
  import hopsworks
8
  import joblib
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  project = hopsworks.login()
11
  fs = project.get_feature_store()
12
 
@@ -23,25 +120,26 @@ df = pd
23
  # featureLabels = features.columns
24
  featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
25
  "Parch", "Ticket", "Fare", "Cabin", "Embarked"]
 
 
 
 
 
26
 
 
 
27
 
28
- def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Cabin, Embarked):
29
- input_list = []
30
 
31
- sexToFeature = {
32
- "male": 0,
33
- "female": 1,
34
- }
35
 
36
- # Convert inputs to features
37
- input_list.append(Pclass) # Todo: Convert to feature
38
- input_list.append(sexToFeature.get(Sex)) # Todo: Convert to feature
39
- input_list.append(Age) # !
40
- input_list.append(SibSp) # Todo: Convert to feature
41
- input_list.append(Parch) # Todo: Convert to feature
42
- input_list.append(Fare) # Todo: Convert to feature
43
- input_list.append(Cabin) # Todo: Convert to feature
44
- input_list.append(Embarked) # Todo: Convert to feature
45
 
46
  # 'res' is a list of predictions returned as the label.
47
  res = model.predict(np.asarray(input_list).reshape(1, -1))
@@ -70,10 +168,12 @@ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Cabin, Embarked):
70
  return img
71
 
72
 
73
- inputs = []
74
- numericalInputs = ["Age", "SibSp", "Parch", "Fare", "Pclass"]
75
- worthlessInputs = ["Name", "Ticket"]
76
- categoricalInputs = ["Sex", "Embarked", "Cabin"]
 
 
77
 
78
  featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
79
  "Parch", "Ticket", "Fare", "Cabin", "Embarked"]
@@ -86,7 +186,7 @@ for feature in featureLabels:
86
  # inputs.append(gr.Inputs.Textbox(default='text', label=feature))
87
  elif feature in categoricalInputs:
88
  inputs.append(gr.inputs.Dropdown(
89
- choices=["a", "b"], default="a", label=feature))
90
  else:
91
  raise Exception(f'Feature: "{feature}" not found')
92
 
 
3
  from PIL import Image
4
  import requests
5
  import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import numpy as np
8
+ from sklearn import preprocessing
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.ensemble import RandomForestRegressor
11
+
12
 
13
  import hopsworks
14
  import joblib
15
 
16
+
17
+ def initialize_data(df):
18
+ # df.set_index("PassengerId", inplace=True) # For debugging
19
+ df.drop("PassengerId", axis=1, inplace=True)
20
+
21
+ # Survived: Label
22
+ # Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
23
+ # Age: Age in years
24
+ # Name: The name of the passenger
25
+ # Sex: male/female
26
+ # SibSp: no. of siblings / spouses aboard the Titanic
27
+ # Parch: no. of parents / children aboard the Titanic
28
+ # Ticket: Ticket number
29
+ # Fare: Passenger fare
30
+ # Cabin: Cabin number
31
+ # Embarked: Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
32
+
33
+ # Three columns have missing values: Age, Cabin, Embarked
34
+ # Cabin has too many missing values to be useful, so we drop it -
35
+ # df.drop("Cabin", axis=1, inplace=True)
36
+
37
+ # Embarked has only 2 missing values so we can drop those rows
38
+ df.dropna(subset=["Embarked"], inplace=True)
39
+
40
+ # Age has 177 missing values, this is a lot, so we train a model to predict the age based on the other features
41
+ # This model will be a simple linear regression model (see below)
42
+
43
+ for row in df.itertuples():
44
+ pass
45
+ # Inspect the name column to extract the title of the passenger
46
+ # This will be a new feature
47
+ # name = row.Name
48
+ # title = name.split(",")[1].split(".")[0].strip()
49
+ # df.at[row.Index, "Title"] = title
50
+
51
+ # Inspect the name column to extract the surname of the passenger
52
+ # This will be a new feature
53
+ # name = row.Name
54
+ # surname = name.split(",")[0].strip()
55
+ # df.at[row.Index, "Surname"] = surname
56
+
57
+ # If the passenger has a cabin number, extract the cabin prefix
58
+ # This will be a new feature
59
+ # cabin = row.Cabin
60
+ # if not pd.isnull(cabin):
61
+ # if len(cabin.split(" ")) > 1: # Some have multiple cabins => take the first one
62
+ # cabin = cabin.split(" ")[0]
63
+ # df.at[row.Index, "Cabin"] = 'Multiple Cabin'
64
+ # else:
65
+ # df.at[row.Index, "Cabin"] = 'Cabin'
66
+
67
+ # cabin_prefix = cabin[0]
68
+ # # df.at[row.Index, "CabinClass"] = cabin_prefix
69
+ # else:
70
+ # # df.at[row.Index, "CabinClass"] = "XXX"
71
+ # df.at[row.Index, "Cabin"] = "No Cabin"
72
+
73
+ # Now we can drop the Name column
74
+ df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
75
+
76
+ return df
77
+
78
+
79
+ def prepare_for_write(df):
80
+ # Convert the categorical features to numerical
81
+ def sexToInt(x):
82
+ if x == "male":
83
+ return 0
84
+ elif x == "female":
85
+ return 1
86
+ else:
87
+ raise Exception("Unsupported sex value: " + x)
88
+
89
+ def embarkedToInt(x):
90
+ if x == "S":
91
+ return 0
92
+ elif x == "C":
93
+ return 1
94
+ elif x == "Q":
95
+ return 2
96
+ else:
97
+ raise Exception("Unsupported embarked value: " + x)
98
+
99
+ df["Sex"] = df["Sex"].apply(sexToInt)
100
+ df["Embarked"] = df["Embarked"].apply(embarkedToInt)
101
+ # le = preprocessing.LabelEncoder()
102
+ # df = df.apply(le.fit_transform)
103
+ df.columns = df.columns.str.lower()
104
+ return df
105
+
106
+
107
  project = hopsworks.login()
108
  fs = project.get_feature_store()
109
 
 
120
  # featureLabels = features.columns
121
  featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
122
  "Parch", "Ticket", "Fare", "Cabin", "Embarked"]
123
+ inputs = []
124
+ numericalInputs = ["Age", "SibSp", "Parch", "Fare"]
125
+ # Maybe move cabin to categorical
126
+ worthlessInputs = ["Name", "Ticket", "Cabin"]
127
+ categoricalInputs = ["Sex", "Embarked", "Pclass"]
128
 
129
+ columnHeaders = ["Pclass", "Sex", "Age", "SibSp",
130
+ "Parch", "Fare", "Embarked"]
131
 
 
 
132
 
133
+ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
 
 
 
134
 
135
+ # Create a dataframe from the input values
136
+ input_variables = pd.DataFrame(
137
+ [[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
138
+ df = initialize_data(input_variables)
139
+ df = prepare_for_write(df)
140
+
141
+ # Save first row as a numpy array
142
+ input_list = df.iloc[0].to_numpy()
 
143
 
144
  # 'res' is a list of predictions returned as the label.
145
  res = model.predict(np.asarray(input_list).reshape(1, -1))
 
168
  return img
169
 
170
 
171
+ catToInput = {
172
+ "Sex": ["male", "female"],
173
+ "Embarked": ["S", "C", "Q"],
174
+ "Pclass": [0, 1, 2]
175
+ }
176
+
177
 
178
  featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
179
  "Parch", "Ticket", "Fare", "Cabin", "Embarked"]
 
186
  # inputs.append(gr.Inputs.Textbox(default='text', label=feature))
187
  elif feature in categoricalInputs:
188
  inputs.append(gr.inputs.Dropdown(
189
+ choices=catToInput.get(feature), default="a", label=feature))
190
  else:
191
  raise Exception(f'Feature: "{feature}" not found')
192
 
requirements.txt CHANGED
@@ -2,3 +2,6 @@ hopsworks
2
  joblib
3
  scikit-learn
4
  gradio
 
 
 
 
2
  joblib
3
  scikit-learn
4
  gradio
5
+ numpy
6
+ pandas
7
+ requests