narinsak unawong
Update app.py
ae96213 verified
raw
history blame
2.74 kB
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# 1. Load Data
# Assuming your data is in a file called 'penguins_lter.csv'
penguins = pd.read_csv('penguins_lter.csv')
penguins = penguins.dropna() # Handle missing values
penguins.drop_duplicates(inplace=True) # Remove duplicates
# 2. Define Features and Target
X = penguins.drop('Species', axis=1)
y = penguins['Species']
# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 4. Create Preprocessing Pipeline
numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
categorical_features = ['Island', 'Sex']
numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
])
# 5. Create and Train Model Pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
pipeline.fit(X_train, y_train)
# 6. Streamlit App
st.title('Penguin Species Prediction')
# 6.1 Sidebar for User Input
st.sidebar.header('Input Features')
island = st.sidebar.selectbox('Island', penguins['Island'].unique())
culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()))
culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()))
flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()))
body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()))
sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
# 6.2 Create Input Dataframe
input_data = pd.DataFrame({
'Island': [island],
'Culmen Length (mm)': [culmen_length],
'Culmen Depth (mm)': [culmen_depth],
'Flipper Length (mm)': [flipper_length],
'Body Mass (g)': [body_mass],
'Sex': [sex]
})
# 6.3 Make Prediction
prediction = pipeline.predict(input_data)
# 6.4 Display Prediction
st.subheader('Prediction')
st.write(f"Predicted Penguin Species: {prediction[0]}")