Spaces:
Sleeping
Sleeping
File size: 2,947 Bytes
cec45bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# Load your data (replace with your actual data loading)
penguins = pd.read_csv('penguins_lter.csv')
# Data Cleaning (same as your existing code)
penguins_cleaned = penguins.dropna()
penguins_cleaned = penguins_cleaned.drop_duplicates()
# Fill missing values (same as your existing code)
numerical_cols = penguins.select_dtypes(include=['number']).columns
penguins[numerical_cols] = penguins[numerical_cols].fillna(penguins[numerical_cols].mean())
categorical_cols = penguins.select_dtypes(include=['object']).columns
penguins[categorical_cols] = penguins[categorical_cols].fillna(penguins[categorical_cols].mode().iloc[0])
# Feature Engineering and Model Training (same as your existing code)
X = penguins.drop('Species', axis=1)
y = penguins['Species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
categorical_features = ['Island', 'Sex']
numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
])
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', KNeighborsClassifier())
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Streamlit App
st.title("Penguin Species Classification")
st.write("This app predicts the species of a penguin based on its features.")
# Display the accuracy
st.write(f"Model Accuracy: {accuracy}")
# Input features for prediction
culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
body_mass = st.number_input("Body Mass (g)", min_value=0.0)
island = st.selectbox("Island", penguins['Island'].unique())
sex = st.selectbox("Sex", penguins['Sex'].unique())
# Create a DataFrame for prediction
new_penguin = pd.DataFrame({
'Culmen Length (mm)': [culmen_length],
'Culmen Depth (mm)': [culmen_depth],
'Flipper Length (mm)': [flipper_length],
'Body Mass (g)': [body_mass],
'Island': [island],
'Sex': [sex]
})
# Make prediction
if st.button("Predict Species"):
prediction = pipeline.predict(new_penguin)
st.write(f"Predicted Species: {prediction[0]}")
|