Spaces:
Runtime error
Runtime error
Commit
·
b56bc50
1
Parent(s):
316c194
mer
Browse files- app.py +120 -20
- requirements.txt +3 -0
app.py
CHANGED
@@ -3,10 +3,107 @@ import numpy as np
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
import hopsworks
|
8 |
import joblib
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
project = hopsworks.login()
|
11 |
fs = project.get_feature_store()
|
12 |
|
@@ -23,25 +120,26 @@ df = pd
|
|
23 |
# featureLabels = features.columns
|
24 |
featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
|
25 |
"Parch", "Ticket", "Fare", "Cabin", "Embarked"]
|
|
|
|
|
|
|
|
|
|
|
26 |
|
|
|
|
|
27 |
|
28 |
-
def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Cabin, Embarked):
|
29 |
-
input_list = []
|
30 |
|
31 |
-
|
32 |
-
"male": 0,
|
33 |
-
"female": 1,
|
34 |
-
}
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
input_list.
|
44 |
-
input_list.append(Embarked) # Todo: Convert to feature
|
45 |
|
46 |
# 'res' is a list of predictions returned as the label.
|
47 |
res = model.predict(np.asarray(input_list).reshape(1, -1))
|
@@ -70,10 +168,12 @@ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Cabin, Embarked):
|
|
70 |
return img
|
71 |
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
|
78 |
featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
|
79 |
"Parch", "Ticket", "Fare", "Cabin", "Embarked"]
|
@@ -86,7 +186,7 @@ for feature in featureLabels:
|
|
86 |
# inputs.append(gr.Inputs.Textbox(default='text', label=feature))
|
87 |
elif feature in categoricalInputs:
|
88 |
inputs.append(gr.inputs.Dropdown(
|
89 |
-
choices=
|
90 |
else:
|
91 |
raise Exception(f'Feature: "{feature}" not found')
|
92 |
|
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
import pandas as pd
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import numpy as np
|
8 |
+
from sklearn import preprocessing
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
from sklearn.ensemble import RandomForestRegressor
|
11 |
+
|
12 |
|
13 |
import hopsworks
|
14 |
import joblib
|
15 |
|
16 |
+
|
17 |
+
def initialize_data(df):
|
18 |
+
# df.set_index("PassengerId", inplace=True) # For debugging
|
19 |
+
df.drop("PassengerId", axis=1, inplace=True)
|
20 |
+
|
21 |
+
# Survived: Label
|
22 |
+
# Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
|
23 |
+
# Age: Age in years
|
24 |
+
# Name: The name of the passenger
|
25 |
+
# Sex: male/female
|
26 |
+
# SibSp: no. of siblings / spouses aboard the Titanic
|
27 |
+
# Parch: no. of parents / children aboard the Titanic
|
28 |
+
# Ticket: Ticket number
|
29 |
+
# Fare: Passenger fare
|
30 |
+
# Cabin: Cabin number
|
31 |
+
# Embarked: Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
|
32 |
+
|
33 |
+
# Three columns have missing values: Age, Cabin, Embarked
|
34 |
+
# Cabin has too many missing values to be useful, so we drop it -
|
35 |
+
# df.drop("Cabin", axis=1, inplace=True)
|
36 |
+
|
37 |
+
# Embarked has only 2 missing values so we can drop those rows
|
38 |
+
df.dropna(subset=["Embarked"], inplace=True)
|
39 |
+
|
40 |
+
# Age has 177 missing values, this is a lot, so we train a model to predict the age based on the other features
|
41 |
+
# This model will be a simple linear regression model (see below)
|
42 |
+
|
43 |
+
for row in df.itertuples():
|
44 |
+
pass
|
45 |
+
# Inspect the name column to extract the title of the passenger
|
46 |
+
# This will be a new feature
|
47 |
+
# name = row.Name
|
48 |
+
# title = name.split(",")[1].split(".")[0].strip()
|
49 |
+
# df.at[row.Index, "Title"] = title
|
50 |
+
|
51 |
+
# Inspect the name column to extract the surname of the passenger
|
52 |
+
# This will be a new feature
|
53 |
+
# name = row.Name
|
54 |
+
# surname = name.split(",")[0].strip()
|
55 |
+
# df.at[row.Index, "Surname"] = surname
|
56 |
+
|
57 |
+
# If the passenger has a cabin number, extract the cabin prefix
|
58 |
+
# This will be a new feature
|
59 |
+
# cabin = row.Cabin
|
60 |
+
# if not pd.isnull(cabin):
|
61 |
+
# if len(cabin.split(" ")) > 1: # Some have multiple cabins => take the first one
|
62 |
+
# cabin = cabin.split(" ")[0]
|
63 |
+
# df.at[row.Index, "Cabin"] = 'Multiple Cabin'
|
64 |
+
# else:
|
65 |
+
# df.at[row.Index, "Cabin"] = 'Cabin'
|
66 |
+
|
67 |
+
# cabin_prefix = cabin[0]
|
68 |
+
# # df.at[row.Index, "CabinClass"] = cabin_prefix
|
69 |
+
# else:
|
70 |
+
# # df.at[row.Index, "CabinClass"] = "XXX"
|
71 |
+
# df.at[row.Index, "Cabin"] = "No Cabin"
|
72 |
+
|
73 |
+
# Now we can drop the Name column
|
74 |
+
df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
|
75 |
+
|
76 |
+
return df
|
77 |
+
|
78 |
+
|
79 |
+
def prepare_for_write(df):
|
80 |
+
# Convert the categorical features to numerical
|
81 |
+
def sexToInt(x):
|
82 |
+
if x == "male":
|
83 |
+
return 0
|
84 |
+
elif x == "female":
|
85 |
+
return 1
|
86 |
+
else:
|
87 |
+
raise Exception("Unsupported sex value: " + x)
|
88 |
+
|
89 |
+
def embarkedToInt(x):
|
90 |
+
if x == "S":
|
91 |
+
return 0
|
92 |
+
elif x == "C":
|
93 |
+
return 1
|
94 |
+
elif x == "Q":
|
95 |
+
return 2
|
96 |
+
else:
|
97 |
+
raise Exception("Unsupported embarked value: " + x)
|
98 |
+
|
99 |
+
df["Sex"] = df["Sex"].apply(sexToInt)
|
100 |
+
df["Embarked"] = df["Embarked"].apply(embarkedToInt)
|
101 |
+
# le = preprocessing.LabelEncoder()
|
102 |
+
# df = df.apply(le.fit_transform)
|
103 |
+
df.columns = df.columns.str.lower()
|
104 |
+
return df
|
105 |
+
|
106 |
+
|
107 |
project = hopsworks.login()
|
108 |
fs = project.get_feature_store()
|
109 |
|
|
|
120 |
# featureLabels = features.columns
|
121 |
featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
|
122 |
"Parch", "Ticket", "Fare", "Cabin", "Embarked"]
|
123 |
+
inputs = []
|
124 |
+
numericalInputs = ["Age", "SibSp", "Parch", "Fare"]
|
125 |
+
# Maybe move cabin to categorical
|
126 |
+
worthlessInputs = ["Name", "Ticket", "Cabin"]
|
127 |
+
categoricalInputs = ["Sex", "Embarked", "Pclass"]
|
128 |
|
129 |
+
columnHeaders = ["Pclass", "Sex", "Age", "SibSp",
|
130 |
+
"Parch", "Fare", "Embarked"]
|
131 |
|
|
|
|
|
132 |
|
133 |
+
def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
|
|
|
|
|
|
|
134 |
|
135 |
+
# Create a dataframe from the input values
|
136 |
+
input_variables = pd.DataFrame(
|
137 |
+
[[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
|
138 |
+
df = initialize_data(input_variables)
|
139 |
+
df = prepare_for_write(df)
|
140 |
+
|
141 |
+
# Save first row as a numpy array
|
142 |
+
input_list = df.iloc[0].to_numpy()
|
|
|
143 |
|
144 |
# 'res' is a list of predictions returned as the label.
|
145 |
res = model.predict(np.asarray(input_list).reshape(1, -1))
|
|
|
168 |
return img
|
169 |
|
170 |
|
171 |
+
catToInput = {
|
172 |
+
"Sex": ["male", "female"],
|
173 |
+
"Embarked": ["S", "C", "Q"],
|
174 |
+
"Pclass": [0, 1, 2]
|
175 |
+
}
|
176 |
+
|
177 |
|
178 |
featureLabels = ["Pclass", "Name", "Sex", "Age", "SibSp",
|
179 |
"Parch", "Ticket", "Fare", "Cabin", "Embarked"]
|
|
|
186 |
# inputs.append(gr.Inputs.Textbox(default='text', label=feature))
|
187 |
elif feature in categoricalInputs:
|
188 |
inputs.append(gr.inputs.Dropdown(
|
189 |
+
choices=catToInput.get(feature), default="a", label=feature))
|
190 |
else:
|
191 |
raise Exception(f'Feature: "{feature}" not found')
|
192 |
|
requirements.txt
CHANGED
@@ -2,3 +2,6 @@ hopsworks
|
|
2 |
joblib
|
3 |
scikit-learn
|
4 |
gradio
|
|
|
|
|
|
|
|
2 |
joblib
|
3 |
scikit-learn
|
4 |
gradio
|
5 |
+
numpy
|
6 |
+
pandas
|
7 |
+
requests
|