Nathanotal commited on
Commit
9f2b6d8
·
1 Parent(s): b56bc50
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +1 -64
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Iris
3
- emoji: 🐢
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
 
1
  ---
2
+ title: Titanic
3
+ emoji: 🚢
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
app.py CHANGED
@@ -14,68 +14,6 @@ import hopsworks
14
  import joblib
15
 
16
 
17
- def initialize_data(df):
18
- # df.set_index("PassengerId", inplace=True) # For debugging
19
- df.drop("PassengerId", axis=1, inplace=True)
20
-
21
- # Survived: Label
22
- # Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
23
- # Age: Age in years
24
- # Name: The name of the passenger
25
- # Sex: male/female
26
- # SibSp: no. of siblings / spouses aboard the Titanic
27
- # Parch: no. of parents / children aboard the Titanic
28
- # Ticket: Ticket number
29
- # Fare: Passenger fare
30
- # Cabin: Cabin number
31
- # Embarked: Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
32
-
33
- # Three columns have missing values: Age, Cabin, Embarked
34
- # Cabin has too many missing values to be useful, so we drop it -
35
- # df.drop("Cabin", axis=1, inplace=True)
36
-
37
- # Embarked has only 2 missing values so we can drop those rows
38
- df.dropna(subset=["Embarked"], inplace=True)
39
-
40
- # Age has 177 missing values, this is a lot, so we train a model to predict the age based on the other features
41
- # This model will be a simple linear regression model (see below)
42
-
43
- for row in df.itertuples():
44
- pass
45
- # Inspect the name column to extract the title of the passenger
46
- # This will be a new feature
47
- # name = row.Name
48
- # title = name.split(",")[1].split(".")[0].strip()
49
- # df.at[row.Index, "Title"] = title
50
-
51
- # Inspect the name column to extract the surname of the passenger
52
- # This will be a new feature
53
- # name = row.Name
54
- # surname = name.split(",")[0].strip()
55
- # df.at[row.Index, "Surname"] = surname
56
-
57
- # If the passenger has a cabin number, extract the cabin prefix
58
- # This will be a new feature
59
- # cabin = row.Cabin
60
- # if not pd.isnull(cabin):
61
- # if len(cabin.split(" ")) > 1: # Some have multiple cabins => take the first one
62
- # cabin = cabin.split(" ")[0]
63
- # df.at[row.Index, "Cabin"] = 'Multiple Cabin'
64
- # else:
65
- # df.at[row.Index, "Cabin"] = 'Cabin'
66
-
67
- # cabin_prefix = cabin[0]
68
- # # df.at[row.Index, "CabinClass"] = cabin_prefix
69
- # else:
70
- # # df.at[row.Index, "CabinClass"] = "XXX"
71
- # df.at[row.Index, "Cabin"] = "No Cabin"
72
-
73
- # Now we can drop the Name column
74
- df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
75
-
76
- return df
77
-
78
-
79
  def prepare_for_write(df):
80
  # Convert the categorical features to numerical
81
  def sexToInt(x):
@@ -135,8 +73,7 @@ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
135
  # Create a dataframe from the input values
136
  input_variables = pd.DataFrame(
137
  [[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
138
- df = initialize_data(input_variables)
139
- df = prepare_for_write(df)
140
 
141
  # Save first row as a numpy array
142
  input_list = df.iloc[0].to_numpy()
 
14
  import joblib
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def prepare_for_write(df):
18
  # Convert the categorical features to numerical
19
  def sexToInt(x):
 
73
  # Create a dataframe from the input values
74
  input_variables = pd.DataFrame(
75
  [[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
76
+ df = prepare_for_write(input_variables)
 
77
 
78
  # Save first row as a numpy array
79
  input_list = df.iloc[0].to_numpy()