Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.metrics import classification_report | |
# Load your data (replace with your actual file path) | |
df = pd.read_csv('penguins_lter.csv') | |
# Data preprocessing (same as in your previous code) | |
numeric_cols = df.select_dtypes(include=['number']).columns | |
for col in numeric_cols: | |
df[col].fillna(df[col].mean(), inplace=True) | |
categorical_cols = df.select_dtypes(exclude=['number']).columns | |
for col in categorical_cols: | |
df[col].fillna(df[col].mode()[0], inplace=True) | |
# Feature Engineering and Model Training (same as in your previous code) | |
X = df.drop('Species', axis=1) | |
y = df['Species'] | |
X = pd.get_dummies(X, drop_first=True) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
pipeline = Pipeline([ | |
('scaler', StandardScaler()), | |
('knn', KNeighborsClassifier(n_neighbors=5)) | |
]) | |
pipeline.fit(X_train, y_train) | |
y_pred = pipeline.predict(X_test) | |
report = classification_report(y_test, y_pred, output_dict=True) | |
# Streamlit app | |
st.title("Penguin Species Classification") | |
st.write("This app predicts the species of a penguin based on its features.") | |
# Display the classification report | |
st.subheader("Classification Report") | |
st.write(pd.DataFrame(report).transpose()) | |
# Add input fields for user input (example) | |
st.sidebar.header("Penguin Features") | |
# Example input fields (replace with your actual features) | |
bill_length_mm = st.sidebar.number_input("Bill Length (mm)", min_value=0.0, value=40.0) | |
bill_depth_mm = st.sidebar.number_input("Bill Depth (mm)", min_value=0.0, value=15.0) | |
# ... Add more input fields for other features ... | |
#Create a dictionary to store the user inputs | |
user_input_dict = { | |
'bill_length_mm': bill_length_mm, | |
'bill_depth_mm': bill_depth_mm, | |
# ... Add other features here | |
} | |
# Create a dataframe for prediction | |
user_input_df = pd.DataFrame([user_input_dict]) | |
user_input_df = pd.get_dummies(user_input_df, drop_first=True) # Apply the same one-hot encoding | |
if st.sidebar.button("Predict"): | |
# Align the columns of user_input_df and X_train | |
missing_cols = set(X_train.columns) - set(user_input_df.columns) | |
for c in missing_cols: | |
user_input_df[c] = 0 # Add missing columns with value 0 | |
user_input_df = user_input_df[X_train.columns] # Reorder the columns | |
prediction = pipeline.predict(user_input_df) | |
st.write(f"Predicted Species: {prediction[0]}") |