Spaces:
Runtime error
Runtime error
Commit
·
9f2b6d8
1
Parent(s):
b56bc50
fix
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
+
title: Titanic
|
3 |
+
emoji: 🚢
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
app.py
CHANGED
@@ -14,68 +14,6 @@ import hopsworks
|
|
14 |
import joblib
|
15 |
|
16 |
|
17 |
-
def initialize_data(df):
|
18 |
-
# df.set_index("PassengerId", inplace=True) # For debugging
|
19 |
-
df.drop("PassengerId", axis=1, inplace=True)
|
20 |
-
|
21 |
-
# Survived: Label
|
22 |
-
# Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
|
23 |
-
# Age: Age in years
|
24 |
-
# Name: The name of the passenger
|
25 |
-
# Sex: male/female
|
26 |
-
# SibSp: no. of siblings / spouses aboard the Titanic
|
27 |
-
# Parch: no. of parents / children aboard the Titanic
|
28 |
-
# Ticket: Ticket number
|
29 |
-
# Fare: Passenger fare
|
30 |
-
# Cabin: Cabin number
|
31 |
-
# Embarked: Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
|
32 |
-
|
33 |
-
# Three columns have missing values: Age, Cabin, Embarked
|
34 |
-
# Cabin has too many missing values to be useful, so we drop it -
|
35 |
-
# df.drop("Cabin", axis=1, inplace=True)
|
36 |
-
|
37 |
-
# Embarked has only 2 missing values so we can drop those rows
|
38 |
-
df.dropna(subset=["Embarked"], inplace=True)
|
39 |
-
|
40 |
-
# Age has 177 missing values, this is a lot, so we train a model to predict the age based on the other features
|
41 |
-
# This model will be a simple linear regression model (see below)
|
42 |
-
|
43 |
-
for row in df.itertuples():
|
44 |
-
pass
|
45 |
-
# Inspect the name column to extract the title of the passenger
|
46 |
-
# This will be a new feature
|
47 |
-
# name = row.Name
|
48 |
-
# title = name.split(",")[1].split(".")[0].strip()
|
49 |
-
# df.at[row.Index, "Title"] = title
|
50 |
-
|
51 |
-
# Inspect the name column to extract the surname of the passenger
|
52 |
-
# This will be a new feature
|
53 |
-
# name = row.Name
|
54 |
-
# surname = name.split(",")[0].strip()
|
55 |
-
# df.at[row.Index, "Surname"] = surname
|
56 |
-
|
57 |
-
# If the passenger has a cabin number, extract the cabin prefix
|
58 |
-
# This will be a new feature
|
59 |
-
# cabin = row.Cabin
|
60 |
-
# if not pd.isnull(cabin):
|
61 |
-
# if len(cabin.split(" ")) > 1: # Some have multiple cabins => take the first one
|
62 |
-
# cabin = cabin.split(" ")[0]
|
63 |
-
# df.at[row.Index, "Cabin"] = 'Multiple Cabin'
|
64 |
-
# else:
|
65 |
-
# df.at[row.Index, "Cabin"] = 'Cabin'
|
66 |
-
|
67 |
-
# cabin_prefix = cabin[0]
|
68 |
-
# # df.at[row.Index, "CabinClass"] = cabin_prefix
|
69 |
-
# else:
|
70 |
-
# # df.at[row.Index, "CabinClass"] = "XXX"
|
71 |
-
# df.at[row.Index, "Cabin"] = "No Cabin"
|
72 |
-
|
73 |
-
# Now we can drop the Name column
|
74 |
-
df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
|
75 |
-
|
76 |
-
return df
|
77 |
-
|
78 |
-
|
79 |
def prepare_for_write(df):
|
80 |
# Convert the categorical features to numerical
|
81 |
def sexToInt(x):
|
@@ -135,8 +73,7 @@ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
|
|
135 |
# Create a dataframe from the input values
|
136 |
input_variables = pd.DataFrame(
|
137 |
[[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
|
138 |
-
df =
|
139 |
-
df = prepare_for_write(df)
|
140 |
|
141 |
# Save first row as a numpy array
|
142 |
input_list = df.iloc[0].to_numpy()
|
|
|
14 |
import joblib
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def prepare_for_write(df):
|
18 |
# Convert the categorical features to numerical
|
19 |
def sexToInt(x):
|
|
|
73 |
# Create a dataframe from the input values
|
74 |
input_variables = pd.DataFrame(
|
75 |
[[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
|
76 |
+
df = prepare_for_write(input_variables)
|
|
|
77 |
|
78 |
# Save first row as a numpy array
|
79 |
input_list = df.iloc[0].to_numpy()
|