Spaces:

Xiangliyao
/

classification-test

Sleeping

App Files Files Community

narinsak unawong commited on Nov 10, 2024

Commit

6df879d

verified ·

1 Parent(s): 45bc0e2

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -44

app.py CHANGED Viewed

@@ -8,65 +8,67 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.metrics import accuracy_score
 # Load your data (replace with your actual data loading)
-# Assuming penguins.csv is in the same directory as your Streamlit app
-try:
-    penguins = pd.read_csv('penguins_lter.csv')
-except FileNotFoundError:
-    st.error("Error: penguins_lter.csv not found. Please make sure the file is in the same directory as the app.")
-    st.stop()
-# Preprocessing steps (same as your original code)
-penguins = penguins.dropna()
-penguins.drop_duplicates(inplace=True)
 # Streamlit app
-st.title('Penguin Species Prediction')
-# Sidebar for user input
-st.sidebar.header('Input Features')
-island = st.sidebar.selectbox('Island', penguins['Island'].unique())
-culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()), float(penguins['Culmen Length (mm)'].mean()))
-culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()), float(penguins['Culmen Depth (mm)'].mean()))
-flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()), float(penguins['Flipper Length (mm)'].mean()))
-body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()), float(penguins['Body Mass (g)'].mean()))
-sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
-# Create input DataFrame
 input_data = pd.DataFrame({
-    'Island': [island],
     'Culmen Length (mm)': [culmen_length],
     'Culmen Depth (mm)': [culmen_depth],
     'Flipper Length (mm)': [flipper_length],
     'Body Mass (g)': [body_mass],
     'Sex': [sex]
 })
-# Prepare the model (same as before, including your pipeline)
-X = penguins.drop('Species', axis=1)
-y = penguins['Species']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
-categorical_features = ['Island', 'Sex']
-numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
-categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
-preprocessor = ColumnTransformer(
-    transformers=[
-        ('num', numerical_transformer, numerical_features),
-        ('cat', categorical_transformer, categorical_features)
-    ])
-pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
-pipeline.fit(X_train, y_train)
-# Make prediction
-prediction = pipeline.predict(input_data)
-# Display prediction
-st.subheader('Prediction')
-st.write(f"Predicted Penguin Species: {prediction[0]}")

 from sklearn.metrics import accuracy_score
 # Load your data (replace with your actual data loading)
+penguins = pd.read_csv('penguins_lter.csv')  # Make sure 'penguins_lter.csv' is in your app's directory or accessible
+# Data cleaning and preprocessing (same as your original code)
+penguins_cleaned = penguins.dropna()
+penguins_cleaned = penguins_cleaned.drop_duplicates()
+# Numerical and Categorical Features (same as original code)
+numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
+categorical_features = ['Island', 'Sex']
+# Preprocessing pipeline (same as original code)
+numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
+categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
+preprocessor = ColumnTransformer(transformers=[
+    ('num', numerical_transformer, numerical_features),
+    ('cat', categorical_transformer, categorical_features)
+])
+# Machine Learning pipeline (same as original code)
+pipeline = Pipeline(steps=[
+    ('preprocessor', preprocessor),
+    ('classifier', KNeighborsClassifier())
+])
 # Streamlit app
+st.title("Penguin Species Classification")
+# Display the dataset (optional)
+if st.checkbox("Show Dataset"):
+    st.write(penguins_cleaned)
+# User input features
+st.header("Enter Penguin Features:")
+culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
+culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
+flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
+body_mass = st.number_input("Body Mass (g)", min_value=0.0)
+island = st.selectbox("Island", penguins_cleaned['Island'].unique())
+sex = st.selectbox("Sex", penguins_cleaned['Sex'].unique())
+# Create a dataframe for the input
 input_data = pd.DataFrame({
     'Culmen Length (mm)': [culmen_length],
     'Culmen Depth (mm)': [culmen_depth],
     'Flipper Length (mm)': [flipper_length],
     'Body Mass (g)': [body_mass],
+    'Island': [island],
     'Sex': [sex]
 })
+# Make Prediction
+if st.button('Predict'):
+  # Assuming 'species' is your target variable (same as original code)
+  X = penguins_cleaned.drop('Species', axis=1)
+  y = penguins_cleaned['Species']
+  # Fit the model
+  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+  pipeline.fit(X_train, y_train)
+  prediction = pipeline.predict(input_data)
+  st.write(f"Predicted Species: {prediction[0]}")