Spaces:
Sleeping
Sleeping
File size: 2,610 Bytes
cec45bd 64afd26 cec45bd 64afd26 cec45bd 64afd26 cec45bd 64afd26 6df879d 64afd26 ae96213 64afd26 6df879d 64afd26 cec45bd 64afd26 cec45bd 64afd26 cec45bd 64afd26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
# Load your data (replace with your actual file path)
df = pd.read_csv('penguins_lter.csv')
# Data preprocessing (same as in your previous code)
numeric_cols = df.select_dtypes(include=['number']).columns
for col in numeric_cols:
df[col].fillna(df[col].mean(), inplace=True)
categorical_cols = df.select_dtypes(exclude=['number']).columns
for col in categorical_cols:
df[col].fillna(df[col].mode()[0], inplace=True)
# Feature Engineering and Model Training (same as in your previous code)
X = df.drop('Species', axis=1)
y = df['Species']
X = pd.get_dummies(X, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline = Pipeline([
('scaler', StandardScaler()),
('knn', KNeighborsClassifier(n_neighbors=5))
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
# Streamlit app
st.title("Penguin Species Classification")
st.write("This app predicts the species of a penguin based on its features.")
# Display the classification report
st.subheader("Classification Report")
st.write(pd.DataFrame(report).transpose())
# Add input fields for user input (example)
st.sidebar.header("Penguin Features")
# Example input fields (replace with your actual features)
bill_length_mm = st.sidebar.number_input("Bill Length (mm)", min_value=0.0, value=40.0)
bill_depth_mm = st.sidebar.number_input("Bill Depth (mm)", min_value=0.0, value=15.0)
# ... Add more input fields for other features ...
#Create a dictionary to store the user inputs
user_input_dict = {
'bill_length_mm': bill_length_mm,
'bill_depth_mm': bill_depth_mm,
# ... Add other features here
}
# Create a dataframe for prediction
user_input_df = pd.DataFrame([user_input_dict])
user_input_df = pd.get_dummies(user_input_df, drop_first=True) # Apply the same one-hot encoding
if st.sidebar.button("Predict"):
# Align the columns of user_input_df and X_train
missing_cols = set(X_train.columns) - set(user_input_df.columns)
for c in missing_cols:
user_input_df[c] = 0 # Add missing columns with value 0
user_input_df = user_input_df[X_train.columns] # Reorder the columns
prediction = pipeline.predict(user_input_df)
st.write(f"Predicted Species: {prediction[0]}") |