Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import ColumnTransformer | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.metrics import accuracy_score | |
# Load your data (replace with your actual data loading) | |
# Assuming penguins.csv is in the same directory as your Streamlit app | |
try: | |
penguins = pd.read_csv('penguins_lter.csv') | |
except FileNotFoundError: | |
st.error("Error: penguins_lter.csv not found. Please make sure the file is in the same directory as the app.") | |
st.stop() | |
# Preprocessing steps (same as your original code) | |
penguins = penguins.dropna() | |
penguins.drop_duplicates(inplace=True) | |
# Streamlit app | |
st.title('Penguin Species Prediction') | |
# Sidebar for user input | |
st.sidebar.header('Input Features') | |
island = st.sidebar.selectbox('Island', penguins['Island'].unique()) | |
culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()), float(penguins['Culmen Length (mm)'].mean())) | |
culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()), float(penguins['Culmen Depth (mm)'].mean())) | |
flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()), float(penguins['Flipper Length (mm)'].mean())) | |
body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()), float(penguins['Body Mass (g)'].mean())) | |
sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique()) | |
# Create input DataFrame | |
input_data = pd.DataFrame({ | |
'Island': [island], | |
'Culmen Length (mm)': [culmen_length], | |
'Culmen Depth (mm)': [culmen_depth], | |
'Flipper Length (mm)': [flipper_length], | |
'Body Mass (g)': [body_mass], | |
'Sex': [sex] | |
}) | |
# Prepare the model (same as before, including your pipeline) | |
X = penguins.drop('Species', axis=1) | |
y = penguins['Species'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)'] | |
categorical_features = ['Island', 'Sex'] | |
numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())]) | |
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))]) | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numerical_transformer, numerical_features), | |
('cat', categorical_transformer, categorical_features) | |
]) | |
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())]) | |
pipeline.fit(X_train, y_train) | |
# Make prediction | |
prediction = pipeline.predict(input_data) | |
# Display prediction | |
st.subheader('Prediction') | |
st.write(f"Predicted Penguin Species: {prediction[0]}") | |