File size: 1,870 Bytes
cec45bd
 
 
 
64afd26
cec45bd
64afd26
cec45bd
fbb40ed
 
 
 
 
 
 
cec45bd
fbb40ed
 
64afd26
 
 
6df879d
64afd26
 
 
 
fbb40ed
 
 
 
64afd26
 
fbb40ed
 
64afd26
fbb40ed
 
ae96213
 
fbb40ed
64afd26
 
 
 
fbb40ed
 
64afd26
fbb40ed
 
64afd26
fbb40ed
6df879d
64afd26
 
cec45bd
fbb40ed
cec45bd
fbb40ed
64afd26
fbb40ed
cec45bd
fbb40ed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Load your data (replace with your actual data loading)
# Assuming you have a CSV file named 'penguins_lter.csv' in your working directory
try:
    df = pd.read_csv('penguins_lter.csv')
except FileNotFoundError:
    st.error("Error: 'penguins_lter.csv' not found. Please upload the file or adjust the path.")
    st.stop()


# Data preprocessing (handle missing values)
numeric_cols = df.select_dtypes(include=['number']).columns
for col in numeric_cols:
    df[col].fillna(df[col].mean(), inplace=True)

categorical_cols = df.select_dtypes(exclude=['number']).columns
for col in categorical_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)


# Model training and prediction (same as your original code)

# Assuming 'Species' is your target variable
X = df.drop('Species', axis=1)
y = df['Species']

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

# Train the pipeline
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)


# Streamlit app
st.title("Penguin Species Classification")

st.write("This app predicts the species of a penguin based on its physical characteristics.")

# Display classification report
st.subheader("Classification Report")
st.text(classification_report(y_test, y_pred))

st.dataframe(df.head())