Spaces:
Sleeping
Sleeping
File size: 1,870 Bytes
cec45bd 64afd26 cec45bd 64afd26 cec45bd fbb40ed cec45bd fbb40ed 64afd26 6df879d 64afd26 fbb40ed 64afd26 fbb40ed 64afd26 fbb40ed ae96213 fbb40ed 64afd26 fbb40ed 64afd26 fbb40ed 64afd26 fbb40ed 6df879d 64afd26 cec45bd fbb40ed cec45bd fbb40ed 64afd26 fbb40ed cec45bd fbb40ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
# Load your data (replace with your actual data loading)
# Assuming you have a CSV file named 'penguins_lter.csv' in your working directory
try:
df = pd.read_csv('penguins_lter.csv')
except FileNotFoundError:
st.error("Error: 'penguins_lter.csv' not found. Please upload the file or adjust the path.")
st.stop()
# Data preprocessing (handle missing values)
numeric_cols = df.select_dtypes(include=['number']).columns
for col in numeric_cols:
df[col].fillna(df[col].mean(), inplace=True)
categorical_cols = df.select_dtypes(exclude=['number']).columns
for col in categorical_cols:
df[col].fillna(df[col].mode()[0], inplace=True)
# Model training and prediction (same as your original code)
# Assuming 'Species' is your target variable
X = df.drop('Species', axis=1)
y = df['Species']
# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, drop_first=True)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a pipeline
pipeline = Pipeline([
('scaler', StandardScaler()),
('knn', KNeighborsClassifier(n_neighbors=5))
])
# Train the pipeline
pipeline.fit(X_train, y_train)
# Make predictions
y_pred = pipeline.predict(X_test)
# Streamlit app
st.title("Penguin Species Classification")
st.write("This app predicts the species of a penguin based on its physical characteristics.")
# Display classification report
st.subheader("Classification Report")
st.text(classification_report(y_test, y_pred))
st.dataframe(df.head()) |