Spaces:

Xiangliyao
/

classification-test

Sleeping

App Files Files Community

narinsak unawong commited on Nov 10, 2024

Commit

cec45bd

verified ·

1 Parent(s): d8bc07d

Upload app-penguins.py

Browse files

Files changed (1) hide show

app-penguins.py +81 -0

app-penguins.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import accuracy_score
+# Load your data (replace with your actual data loading)
+penguins = pd.read_csv('penguins_lter.csv')
+# Data Cleaning (same as your existing code)
+penguins_cleaned = penguins.dropna()
+penguins_cleaned = penguins_cleaned.drop_duplicates()
+# Fill missing values (same as your existing code)
+numerical_cols = penguins.select_dtypes(include=['number']).columns
+penguins[numerical_cols] = penguins[numerical_cols].fillna(penguins[numerical_cols].mean())
+categorical_cols = penguins.select_dtypes(include=['object']).columns
+penguins[categorical_cols] = penguins[categorical_cols].fillna(penguins[categorical_cols].mode().iloc[0])
+# Feature Engineering and Model Training (same as your existing code)
+X = penguins.drop('Species', axis=1)
+y = penguins['Species']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
+categorical_features = ['Island', 'Sex']
+numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
+categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
+preprocessor = ColumnTransformer(
+    transformers=[
+        ('num', numerical_transformer, numerical_features),
+        ('cat', categorical_transformer, categorical_features)
+    ])
+pipeline = Pipeline(steps=[
+    ('preprocessor', preprocessor),
+    ('classifier', KNeighborsClassifier())
+])
+pipeline.fit(X_train, y_train)
+y_pred = pipeline.predict(X_test)
+accuracy = accuracy_score(y_test, y_pred)
+# Streamlit App
+st.title("Penguin Species Classification")
+st.write("This app predicts the species of a penguin based on its features.")
+# Display the accuracy
+st.write(f"Model Accuracy: {accuracy}")
+# Input features for prediction
+culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
+culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
+flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
+body_mass = st.number_input("Body Mass (g)", min_value=0.0)
+island = st.selectbox("Island", penguins['Island'].unique())
+sex = st.selectbox("Sex", penguins['Sex'].unique())
+# Create a DataFrame for prediction
+new_penguin = pd.DataFrame({
+    'Culmen Length (mm)': [culmen_length],
+    'Culmen Depth (mm)': [culmen_depth],
+    'Flipper Length (mm)': [flipper_length],
+    'Body Mass (g)': [body_mass],
+    'Island': [island],
+    'Sex': [sex]
+    })
+# Make prediction
+if st.button("Predict Species"):
+  prediction = pipeline.predict(new_penguin)
+  st.write(f"Predicted Species: {prediction[0]}")