Spaces:

Nathanotal
/

titanic

Runtime error

App Files Files Community

Nathanotal commited on Nov 16, 2022

Commit

9f2b6d8

1 Parent(s): b56bc50

fix

Browse files

Files changed (2) hide show

README.md +2 -2
app.py +1 -64

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Iris
-emoji: 🐢
 colorFrom: purple
 colorTo: green
 sdk: gradio

 ---
+title: Titanic
+emoji: 🚢
 colorFrom: purple
 colorTo: green
 sdk: gradio

app.py CHANGED Viewed

@@ -14,68 +14,6 @@ import hopsworks
 import joblib
-def initialize_data(df):
-    # df.set_index("PassengerId", inplace=True) # For debugging
-    df.drop("PassengerId", axis=1, inplace=True)
-    # Survived: Label
-    # Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
-    # Age: Age in years
-    # Name: The name of the passenger
-    # Sex: male/female
-    # SibSp: no. of siblings / spouses aboard the Titanic
-    # Parch: no. of parents / children aboard the Titanic
-    # Ticket: Ticket number
-    # Fare: Passenger fare
-    # Cabin: Cabin number
-    # Embarked: Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
-    # Three columns have missing values: Age, Cabin, Embarked
-    # Cabin has too many missing values to be useful, so we drop it -
-    # df.drop("Cabin", axis=1, inplace=True)
-    # Embarked has only 2 missing values so we can drop those rows
-    df.dropna(subset=["Embarked"], inplace=True)
-    # Age has 177 missing values, this is a lot, so we train a model to predict the age based on the other features
-    # This model will be a simple linear regression model (see below)
-    for row in df.itertuples():
-        pass
-        # Inspect the name column to extract the title of the passenger
-        # This will be a new feature
-        # name = row.Name
-        # title = name.split(",")[1].split(".")[0].strip()
-        # df.at[row.Index, "Title"] = title
-        # Inspect the name column to extract the surname of the passenger
-        # This will be a new feature
-        # name = row.Name
-        # surname = name.split(",")[0].strip()
-        # df.at[row.Index, "Surname"] = surname
-        # If the passenger has a cabin number, extract the cabin prefix
-        # This will be a new feature
-        # cabin = row.Cabin
-        # if not pd.isnull(cabin):
-        #     if len(cabin.split(" ")) > 1: # Some have multiple cabins => take the first one
-        #         cabin = cabin.split(" ")[0]
-        #         df.at[row.Index, "Cabin"] = 'Multiple Cabin'
-        #     else:
-        #         df.at[row.Index, "Cabin"] = 'Cabin'
-        #     cabin_prefix = cabin[0]
-        #     # df.at[row.Index, "CabinClass"] = cabin_prefix
-        # else:
-        #     # df.at[row.Index, "CabinClass"] = "XXX"
-        #     df.at[row.Index, "Cabin"] = "No Cabin"
-    # Now we can drop the Name column
-    df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
-    return df
 def prepare_for_write(df):
     # Convert the categorical features to numerical
     def sexToInt(x):
@@ -135,8 +73,7 @@ def titanic(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
     # Create a dataframe from the input values
     input_variables = pd.DataFrame(
         [[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
-    df = initialize_data(input_variables)
-    df = prepare_for_write(df)
     # Save first row as a numpy array
     input_list = df.iloc[0].to_numpy()

 import joblib
 def prepare_for_write(df):
     # Convert the categorical features to numerical
     def sexToInt(x):
     # Create a dataframe from the input values
     input_variables = pd.DataFrame(
         [[Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]], columns=columnHeaders)
+    df = prepare_for_write(input_variables)
     # Save first row as a numpy array
     input_list = df.iloc[0].to_numpy()